You are viewing an old version of this page. View the current version.

Compare with Current View Page History

« Previous Version 4 Next »

The Job Summarizer Executor can be configured using the Rest API. 

Create Job Summarizer Executor


Field

Required

Default

Multiple

NotesExample
typeYes-No

The value must be "application".

"application"

_typeYes-No

The value must be "application".

"application"

appNameYes-NoThe name of the application"Job-Executor"
appTypeYes-NoThe value must be "job-summarize-executor"."job-summarize-executor"
configYes-NoThe value must be "com.accenture.aspire:app-jobsummarize-executor"."com.accenture.aspire:app-jobsummarize-executor"
descriptionYes-NoThe description

"Job-Executor"

propertiesYes-NoConfiguration object
dataPathYes-NoThe path of the job that contains the tables data"/doc"
containerPathYes-NoThe sub path of the data that contains each table"container"
tableIdPathYes-NoThe sub path of table data that contains the table id"container/url"
seedIdPathYes-NoThe sub path of table data that contains the seed id"container/seed/id"
columnsPathYes-NoThe sub path of table objects that contains the columns information"dataProfile/columns"
columnNamePathYes-NoThe sub path of column objects that contains the column name"columnName"
columnTypePathYes-NoThe sub path of column objects that contains the column type"column_type"
columnsPatternsYes[]YesThe columns patterns to detect each column type[{"type":"TEXT","pattern":"STRING"},{"type":"INT","pattern":"INT32"}]
typeYes"TEXT"NoThe data type to use for the specified pattern. Accepted values: "TEXT", "LONG", "INT", "FLOAT", "DOUBLE", "BOOLEAN""TEXT"
patternYes-NoThe pattern to match"STRING"






logFrequencyYes1000NoThe frequency for reporting the processed rows.1000
filterRowsYesfalseNoEnable to filter the rows to process.true
useFilterFileYestrueNoEnable to use a groovy file to filter the rowstrue
groovyPathNo-NoThe path of the groovy script that contains the filter logic.  It must return a boolean value, if true the row will be filtered"C:\\Aspire\\config\\rowsGroovyFilter.txt"
groovyScriptNo-NoScript used to filter the rows. It must return a boolean value, if true the row will be filtered"row.getBoolean(\"sensitive\") == true"
urlYes-NoServer URL"http://localhost:9200/"
authTypeYes"none"NoThe authentication type. Accepted values: "none", "basic", "aws""none"
usernameNo-NoUser with the permissions to read from the Elastic index specified. Used only if the authType is "basic""admin"
passwordNo-NoThe password for the specified user. Used only if the authType is "basic""password"
regionNo-NoAWS region. Used only if the authType is "aws""us-east-2"
useCredentialsProviderChainNofalseNoUse AWS Credentials Provider Chain. Used only if the authType is "aws""true"
accessKeyNo-NoKey utilized to access Amazon Web Services (AWS). Used only if the authType is "aws" and if useCredentialsProviderChain is false"AKIAIOSFODNN7EXAMPLE"
secretKeyNo-NoSecret key for the access key. Used only if the authType is "aws" and if useCredentialsProviderChain is false"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
assumeRoleNofalseNoEnable to assume the specified role to get the credentials. Used only if the authType is "aws"true
roleArnNo-NoThe Role ARN to assume. Used only if the authType is "aws" and if assumeRole is true"arn:aws:iam::123456789012:user/group/role"
indexYes-NoThe elastic index to use"values-index"
queryYes-NoThe query for fetching the unique values. The placeholders ${seedId} and ${tableId}."{\"query\":{\"bool\":{\"must\":[{\"term\":{\"name.keyword\":{\"value\":\"column-value\"}}},{\"term\":{\"value.seedId\":{\"value\":\"${seedId}\"}}},{\"term\":{\"value.tableId\":{\"value\":\"${tableId}\"}}}]}}}"
uniqueValuesYestrueNoIf enable the expected row format will be the one used for unique values, if not it will use the _source content as the row bodytrue
scrollTimeYes"5m"NoThe time to keep each scroll request active"5m"
idleConnectionTimeoutYes3600000NoMaximum time (in milliseconds) to keep an idle connection open3600000
maxConnectionsYes100NoMaximum number of connections to be opened100
maxConnectionsPerRouteYes10NoMaximum number of connections opened for the same target10
connectionTimeoutYes15000NoMaximum time (in milliseconds) to wait for the connection15000
socketTimeoutYes15000NoMaximum time (in milliseconds) to wait for a socket response15000
useThrottlingYesfalseNoFlag to enable connection throttlingtrue
throttlingRateNo5000NoTime period (in milliseconds) to throttle the connection. Used only if useThrottling is true5000
throttlingConnectionRateNo500NoMaximum number of connections used during the throttling period. Used only if useThrottling is true500
maxRetriesYes3NoMaximum number of retries a failed document3
retryWaitTimeYes5000NoTime (in milliseconds) to wait before a retry5000

Example

POST /aspire/_api/workflows/{workflow}/rules
{
  "type": "application",
  "_type": "application",
  "appName": "Job_Summarize_Executor",
  "appType": "job-summarize-executor",
  "config": "com.accenture.aspire:app-jobsummarize-executor",
  "description": "job-summarizer",
  "properties": {
    "dataPath": "/doc",
    "containerPath": "container",
    "tableIdPath": "container/url",
    "seedIdPath": "container/seed/id",
    "columnsPath": "dataProfile/columns",
    "columnNamePath": "columnName",
    "columnTypePath": "column_type",
    "columnsPatterns": [{
        "type": "TEXT",
        "pattern": "STRING"
      }, {
        "type": "INT",
        "pattern": "INT32"
      }
    ],
    "logFrequency": 1000,
    "filterRows": false,
    "debug": false,
    "url": "http://localhost:9200/",
    "authType": "none",
    "index": "parquet-data",
    "query": "{\n  \"query\": {\n    \"bool\": {\n      \"must\": [{\n          \"term\": {\n            \"name.keyword\": {\n              \"value\": \"column-value\"\n            }\n          }\n        }, {\n          \"term\": {\n            \"value.seedId\": {\n              \"value\": \"${seedId}\"\n            }\n          }\n        }, {\n          \"term\": {\n            \"value.tableId\": {\n              \"value\": \"${tableId}\"\n            }\n          }\n        }\n      ]\n    }\n  }\n}\n",
    "uniqueValues": true,
    "scrollTime": "5m",
    "idleConnectionTimeout": 3600000,
    "maxConnections": 100,
    "maxConnectionsPerRoute": 10,
    "connectionTimeout": 15000,
    "socketTimeout": 15000,
    "useThrottling": false,
    "maxRetries": 3,
    "retryWaitTime": 5000
  }
}

Update Job Summarizer Executor


Field

Required

Default

Multiple

NotesExample
idYes-NoId of the application to update"61014782-442a-4587-ab85-ba1439a7f7b5"
typeYes-No

The value must be "application".

"application"

_typeYes-No

The value must be "application".

"application"

appNameYes-NoThe name of the application"Job-Executor"
appTypeYes-NoThe value must be "job-summarize-executor"."job-summarize-executor"
configYes-NoThe value must be "com.accenture.aspire:app-jobsummarize-executor"."com.accenture.aspire:app-jobsummarize-executor"
descriptionYes-NoThe description

"Job-Executor"

propertiesYes-NoConfiguration object
dataPathYes-NoThe path of the job that contains the tables data"/doc"
containerPathYes-NoThe sub path of the data that contains each table"container"
tableIdPathYes-NoThe sub path of table data that contains the table id"container/url"
seedIdPathYes-NoThe sub path of table data that contains the seed id"container/seed/id"
columnsPathYes-NoThe sub path of table objects that contains the columns information"dataProfile/columns"
columnNamePathYes-NoThe sub path of column objects that contains the column name"columnName"
columnTypePathYes-NoThe sub path of column objects that contains the column type"column_type"
columnsPatternsYes[]YesThe columns patterns to detect each column type[{"type":"TEXT","pattern":"STRING"},{"type":"INT","pattern":"INT32"}]
typeYes"TEXT"NoThe data type to use for the specified pattern. Accepted values: "TEXT", "LONG", "INT", "FLOAT", "DOUBLE", "BOOLEAN""TEXT"
patternYes-NoThe pattern to match"STRING"






logFrequencyYes1000NoThe frequency for reporting the processed rows.1000
filterRowsYesfalseNoEnable to filter the rows to process.true
useFilterFileYestrueNoEnable to use a groovy file to filter the rowstrue
groovyPathNo-NoThe path of the groovy script that contains the filter logic.  It must return a boolean value, if true the row will be filtered"C:\\Aspire\\config\\rowsGroovyFilter.txt"
groovyScriptNo-NoScript used to filter the rows. It must return a boolean value, if true the row will be filtered"row.getBoolean(\"sensitive\") == true"
urlYes-NoServer URL"http://localhost:9200/"
authTypeYes"none"NoThe authentication type. Accepted values: "none", "basic", "aws""none"
usernameNo-NoUser with the permissions to read from the Elastic index specified. Used only if the authType is "basic""admin"
passwordNo-NoThe password for the specified user. Used only if the authType is "basic""password"
regionNo-NoAWS region. Used only if the authType is "aws""us-east-2"
useCredentialsProviderChainNofalseNoUse AWS Credentials Provider Chain. Used only if the authType is "aws""true"
accessKeyNo-NoKey utilized to access Amazon Web Services (AWS). Used only if the authType is "aws" and if useCredentialsProviderChain is false"AKIAIOSFODNN7EXAMPLE"
secretKeyNo-NoSecret key for the access key. Used only if the authType is "aws" and if useCredentialsProviderChain is false"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
assumeRoleNofalseNoEnable to assume the specified role to get the credentials. Used only if the authType is "aws"true
roleArnNo-NoThe Role ARN to assume. Used only if the authType is "aws" and if assumeRole is true"arn:aws:iam::123456789012:user/group/role"
indexYes-NoThe elastic index to use"values-index"
queryYes-NoThe query for fetching the unique values. The placeholders ${seedId} and ${tableId}."{\"query\":{\"bool\":{\"must\":[{\"term\":{\"name.keyword\":{\"value\":\"column-value\"}}},{\"term\":{\"value.seedId\":{\"value\":\"${seedId}\"}}},{\"term\":{\"value.tableId\":{\"value\":\"${tableId}\"}}}]}}}"
uniqueValuesYestrueNoIf enable the expected row format will be the one used for unique values, if not it will use the _source content as the row bodytrue
scrollTimeYes"5m"NoThe time to keep each scroll request active"5m"
idleConnectionTimeoutYes3600000NoMaximum time (in milliseconds) to keep an idle connection open3600000
maxConnectionsYes100NoMaximum number of connections to be opened100
maxConnectionsPerRouteYes10NoMaximum number of connections opened for the same target10
connectionTimeoutYes15000NoMaximum time (in milliseconds) to wait for the connection15000
socketTimeoutYes15000NoMaximum time (in milliseconds) to wait for a socket response15000
useThrottlingYesfalseNoFlag to enable connection throttlingtrue
throttlingRateNo5000NoTime period (in milliseconds) to throttle the connection. Used only if useThrottling is true5000
throttlingConnectionRateNo500NoMaximum number of connections used during the throttling period. Used only if useThrottling is true500
maxRetriesYes3NoMaximum number of retries a failed document3
retryWaitTimeYes5000NoTime (in milliseconds) to wait before a retry5000

Example

PUT /aspire/_api/workflows/{workflow}/rules/{id}
{
  "id": "951cf9a0-6078-43f2-bce1-6e377fc22fc5",
  "type": "application",
  "_type": "application",
  "appName": "Job_Summarize_Executor",
  "appType": "job-summarize-executor",
  "config": "com.accenture.aspire:app-jobsummarize-executor",
  "description": "job-summarizer",
  "properties": {
    "dataPath": "/doc",
    "containerPath": "container",
    "tableIdPath": "container/url",
    "seedIdPath": "container/seed/id",
    "columnsPath": "dataProfile/columns",
    "columnNamePath": "columnName",
    "columnTypePath": "column_type",
    "columnsPatterns": [{
        "type": "TEXT",
        "pattern": "STRING"
      }, {
        "type": "INT",
        "pattern": "INT32"
      }
    ],
    "logFrequency": 1000,
    "filterRows": false,
    "debug": false,
    "url": "http://localhost:9200/",
    "authType": "none",
    "index": "parquet-data",
    "query": "{\n  \"query\": {\n    \"bool\": {\n      \"must\": [{\n          \"term\": {\n            \"name.keyword\": {\n              \"value\": \"column-value\"\n            }\n          }\n        }, {\n          \"term\": {\n            \"value.seedId\": {\n              \"value\": \"${seedId}\"\n            }\n          }\n        }, {\n          \"term\": {\n            \"value.tableId\": {\n              \"value\": \"${tableId}\"\n            }\n          }\n        }\n      ]\n    }\n  }\n}\n",
    "uniqueValues": true,
    "scrollTime": "5m",
    "idleConnectionTimeout": 3600000,
    "maxConnections": 100,
    "maxConnectionsPerRoute": 10,
    "connectionTimeout": 15000,
    "socketTimeout": 15000,
    "useThrottling": false,
    "maxRetries": 3,
    "retryWaitTime": 5000
  }
}
  • No labels