Field | Required | Default | Multiple | Notes | Example | ||||
---|---|---|---|---|---|---|---|---|---|
type | Yes | - | No | The value must be "application". | "application" | ||||
_type | Yes | - | No | The value must be "application". | "application" | ||||
appName | Yes | - | No | The name of the application | "ParquetXml-Executor" | ||||
appType | Yes | - | No | The value must be "parquetxml-summarize-executor". | "parquetxml-summarize-executor" | ||||
config | Yes | - | No | The value must be "com.accenture.aspire:app-parquetsummarizexmlsummarize-executor". | "com.accenture.aspire:app-parquetsummarizexmlsummarize-executor" | ||||
description | Yes | - | No | The description | "ParquetXml-Executor" | ||||
properties | Yes | - | No | Configuration object | |||||
addSchemarootNode | Yestrue | "/" | No | If enabled, the table schema will be added to the processed columns. | true | The root node which contains the sub-jobs to publish. | "/path/rootNode/" | ||
characterEncoding | No | "UTF-8" | No | The character encoding of the XML file to be read, if not UTF-8. | "UTF-8" | ||||
cleanse | No | useTempFile | Yes | true | No | Enable if you want to download clean the content stream to a temporary file before processing it. | true | XML content from non-readable characters. | ASCII code 15 |
honorDTD | No | true | No | Fetch XML's DTD. | true | ||||
limitNested | No | false | No | Limit how many levels in a nested structures should be flattened. | false | ||||
maxLevel | No | 10 | No | The maximum nested level to be flatten. | 10 | ||||
limitArrays | No | false | No | Limit how many entries in array structures should be processed. | false | ||||
arraysLimit | No | 10 | No | The maximum number of array entries to process. | 10 | ||||
debug | No | false | No | Debug messages will be enabled. | false | ||||
threadPool | No | threadPool | Yes | 5 | No | The number of threads to use for parallel processing. | 5 | ||
logFrequency | YesNo | 10005 | No | The frequency for reporting the processed rows. | 5 | ||||
useSampling | No | false | No | Process only a random sample of the table rows. This option could increase the memory usage. | false | ||||
minimumSamples | No | 10 | No | The minimum of randoms samples that will be gathered from the table. | 10 | ||||
maxSamples | No | 2000 | No | The maximum of randoms samples that will be gathered from the table. | 2000 | ||||
minimumPercent | No | 0.35 | No | The minimum percentage of the total rows .to process from table. | 0.35 | ||||
limitRows | No | false | No | Limit how many rows from the table will be read. | false | ||||
maxRowsToRead | No | 10 | No | The maximum of row from the table that will be read. | 101000 | ||||
filterRows | YesNo | false | No | Enable Check to filter the rows to process. | truefalse | ||||
useFilterFile | YesNo | true | No | Enable to use a groovy file to filter the rows. | true | ||||
useScriptFile | No | true | No | Enable to specify a script file or disable to specify an uploaded resource file. | true | ||||
groovyPath | NoYes | - | No | The path of the groovy script that contains the filter logic. It must return a boolean value. If true, the row will be filtered. | "C:\\Aspire\\config\\rowsGroovyFilter.txt" | ||||
groovyScript | No | - | No | Script used to filter the rows. It must return a boolean value. If true, the row will be filtered. | "row.getBoolean(\"sensitive\") == true" |
Code Block | ||||
---|---|---|---|---|
| ||||
{ "type": "application", "_type": "application", "description": "ParquetXml-Executor", "config": "com.accenture.aspire:app-parquetsummarizexmlsummarize-executor", "appType": "parquetxml-summarize-executor", "appName": "ParquetXml Summarize Executor", "properties": { "rootNode": "/", "characterEncoding": "addSchema"UTF-8", "cleanse": true, "honorDTD": true, "useTempFilelimitNested": truefalse, "limitArrays": false, "debug": false, "threadPool": 5, "logFrequency": 10005, "useSampling": false, "filterRows": true,false "useFilterFile": false, "groovyScript": "// This script must return a boolean.\n// The references of the job, doc, component, row and table objects are available.\n// Javadoc references \n// Row (row) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Row.html\n// Table (table) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Table.html\nrow.getBoolean(\"sensitive\") == true" } } |
Field | Required | Default | Multiple | Notes | Example |
---|
type | Yes | - | No | The value must be "application". | "application" |
_type | Yes | - | No | The value must be "application". | "application" |
appName | Yes | - | No | The name of the application | " |
Xml-Executor" | ||||
appType | Yes | - | No | The value must be " |
xml-summarize-executor". | " |
xml-summarize-executor" | ||||
config | Yes | - | No | The value must be "com.accenture.aspire:app- |
xmlsummarize-executor". | "com.accenture.aspire:app- |
xmlsummarize-executor" | |||||
description | Yes | - | No | The description | " |
Xml-Executor" | ||||
properties | Yes | - | No | Configuration object |
---|
rootNode | Yes |
"/" | No |
The root node which contains the sub-jobs to publish. | "/path/rootNode/" | ||||
characterEncoding | No | "UTF-8" | No | The character encoding of the XML file to be read, if not UTF-8. | "UTF-8" |
cleanse | No |
true | No | Enable if you want to |
clean the |
XML content from non-readable characters. | ASCII code 15 | ||||
honorDTD | No | true | No | Fetch XML's DTD. | true |
limitNested | No | false | No | Limit how many levels in a nested structures should be flattened. | false |
maxLevel | No | 10 | No | The maximum nested level to be flatten. | 10 |
limitArrays | No | false | No | Limit how many entries in array structures should be processed. | false |
arraysLimit | No | 10 | No | The maximum number of array entries to process. | 10 |
debug | No | false | No | Debug messages will be enabled. | false |
threadPool | No |
5 | No | The number of threads to use for parallel processing. | 5 | |
logFrequency |
No |
5 | No | The frequency for reporting the processed rows. | 5 | |
useSampling | No | false | No | Process only a random sample of the table rows |
. This option could increase the memory usage. | false | ||||
minimumSamples | No | 10 | No | The minimum of randoms samples that will be gathered from the table. | 10 |
maxSamples | No | 2000 | No | The maximum of randoms samples that will be gathered from the table. | 2000 |
minimumPercent | No | 0.35 | No | The minimum percentage of the total rows to process from table. | 0.35 |
limitRows | No | false | No | Limit how many rows from the table will be read. | false |
maxRowsToRead | No | 10 | No | The maximum of row from the table that will be read. | 10 |
filterRows | No |
false | No |
Check to filter the rows to process. |
false |
useFilterFile |
No | true | No | Enable to use a groovy file to filter |
. | true | ||||
useScriptFile | No | true | No | Enable to specify a script file or disable to specify an uploaded resource file. | true |
groovyPath |
Yes | - | No | The path of the groovy script that contains the filter logic. It must return a boolean value. If true, the row will be filtered. | "C:\\Aspire\\config\\rowsGroovyFilter.txt" | |
groovyScript | No | - | No | Script used to filter the rows. It must return a boolean value. If true, the row will be filtered. | "row.getBoolean(\"sensitive\") == true" |
Code Block | ||||
---|---|---|---|---|
| ||||
{ "id": "61014782-442a-4587-ab85-ba1439a7f7b5", "type": "application", "_type": "application", "description": "ParquetXml-Executor", "config": "com.accenture.aspire:app-parquetsummarizexmlsummarize-executor", "appType": "parquetxml-summarize-executor", "appName": "ParquetXml Summarize Executor", "properties": { "rootNode": "/", "characterEncoding": "UTF-8", "cleanse": true, "addSchema "honorDTD": true, "limitNested": true, "maxLevel": 10, "useTempFilelimitArrays": true, "arraysLimit": 10, "debug": falsetrue, "threadPool": 5, "threadPoollogFrequency": 5, "useSampling": true, "logFrequency "minimumSamples": 10, "maxSamples": 10002000, "minimumPercent": 0.35, "filterRowslimitRows": true, "maxRowsToRead": 10, "filterRows": true, "useFilterFile": false, "groovyScript": "// This script must return a boolean.\n// The references of the job, doc, component, row and table objects are available.\n// Javadoc references \n// Row (row) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Row.html\n// Table (table) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Table.html\nrow.getBoolean(\"sensitive\") == true" } } |