The Job Summarizer Executor is able to process the table data contained in an Aspire job and fetch the associated rows from an Elasticsearch index. Each extracted row will be processed by the summarizers attached to the job.
The Job Summarizers executor allows to summarize data based on the table structue contained by a job.
Example of supported table structure:
{ "container": { "repItemType": "aspire/folder", "seed": { "description": "s3", "id": "a8c0c88a-d3b4-42fb-b27d-57137ab85154", "type": "s3", "properties": { "tag1": "value1", "seed": "/qa-s3-storage/test-level1/split container/", "processSplitFiles": "true", "usePrefixesForSplitCheck": "true", "splitCheckPrefix": "part-" }, "tags": [ "darwin" ] }, "isContainer": "TYPE-NOT-PROVIDED", "connectorSpecific": { "skippedRows": "0", "rowCount": "32622", "childId": [ "/qa-s3-storage/test-level1/split container/part-00000-d91360fd-0995-4af2-9998-39454c778297-c000.parquet", "/qa-s3-storage/test-level1/split container/part-00002-d91360fd-0995-4af2-9998-39454c778297-c000.parquet", "/qa-s3-storage/test-level1/split container/part-00001-d91360fd-0995-4af2-9998-39454c778297-c000.parquet" ] }, "title": "split container", "url": "/qa-s3-storage/test-level1/split container/", "samples": [{ "AST": null, "AC": null, "HomeTeam": "West Ham", "AwayTeam": "Wimbledon", "FTR": "A", "Referee": null, "AF": null, "HTR": null, "HST": null, "HTHG": null, "HR": null, "HS": null, "FTHG": "0", "DateTime": "745286400000000", "HY": null, "AR": null, "AS": null, "HTAG": null, "FTAG": "2", "AY": null, "HC": null, "Season": "1993-94", "HF": null }, { "AST": null, "AC": null, "HomeTeam": "Chelsea", "AwayTeam": "QPR", "FTR": "H", "Referee": null, "AF": null, "HTR": null, "HST": null, "HTHG": null, "HR": null, "HS": null, "FTHG": "2", "DateTime": "746236800000000", "HY": null, "AR": null, "AS": null, "HTAG": null, "FTAG": "0", "AY": null, "HC": null, "Season": "1993-94", "HF": null } ], "displayurl": "/qa-s3-storage/test-level1/split container/", "crawlStart": "2022-06-07T19:58:20Z", "ingestionEnd": "2022-06-07T19:58:54Z", "submitTime": "2022-06-07T19:58:55+0000", "ingestionStart": "2022-06-07T19:58:50Z", "dataProfile": { "columns": [{ "technical_tags": "OPTIONAL", "nullCount": "0", "column_type": "STRING", "columnName": "AwayTeam", "uniqueCount": "50" }, { "technical_tags": "OPTIONAL", "nullCount": "8472", "column_type": "STRING", "columnName": "Referee", "uniqueCount": "154" }, { "technical_tags": "OPTIONAL", "minValue": "0.0", "maxValue": "33.0", "meanValue": "11.41498260725533", "nullCount": "8472", "column_type": "INT32", "stdDev": "3.785881246274845", "columnName": "HF", "uniqueCount": "30" }, { "technical_tags": "OPTIONAL", "minValue": "0.0", "maxValue": "24.0", "meanValue": "6.159392082159955", "nullCount": "8472", "column_type": "INT32", "stdDev": "3.3342743104428822", "columnName": "HST", "uniqueCount": "24" }, { "technical_tags": "OPTIONAL", "minValue": "0.0", "maxValue": "9.0", "meanValue": "1.7452791121417932", "nullCount": "8472", "column_type": "INT32", "stdDev": "1.2683279579638864", "columnName": "AY", "uniqueCount": "10" }, { "technical_tags": "OPTIONAL", "nullCount": "0", "column_type": "STRING", "columnName": "Season", "uniqueCount": "29" }, { "technical_tags": "OPTIONAL", "minValue": "0.0", "maxValue": "9.0", "meanValue": "1.5191146264447137", "nullCount": "0", "column_type": "INT32", "stdDev": "1.301419064816484", "columnName": "FTHG", "uniqueCount": "10" }, { "technical_tags": "OPTIONAL", "nullCount": "2772", "column_type": "STRING", "columnName": "HTR", "uniqueCount": "3" }, { "technical_tags": "OPTIONAL", "minValue": "0.0", "maxValue": "20.0", "meanValue": "4.784661255590524", "nullCount": "8472", "column_type": "INT32", "stdDev": "2.7948464257686143", "columnName": "AST", "uniqueCount": "21" }, { "technical_tags": "OPTIONAL", "minValue": "0.0", "maxValue": "19.0", "meanValue": "4.78503395726357", "nullCount": "8472", "column_type": "INT32", "stdDev": "2.7298022947989997", "columnName": "AC", "uniqueCount": "20" }, { "technical_tags": "OPTIONAL", "minValue": "1.0", "maxValue": "29.0", "meanValue": "11.915935067086311", "nullCount": "8472", "column_type": "INT32", "stdDev": "3.954860578157097", "columnName": "AF", "uniqueCount": "29" }, { "technical_tags": "OPTIONAL", "minValue": "0.0", "maxValue": "30.0", "meanValue": "10.61669703495112", "nullCount": "8472", "column_type": "INT32", "stdDev": "4.562810353472809", "columnName": "AS", "uniqueCount": "31" }, { "technical_tags": "OPTIONAL", "minValue": "0.0", "maxValue": "7.0", "meanValue": "1.408025509358946", "nullCount": "8472", "column_type": "INT32", "stdDev": "1.184736178155765", "columnName": "HY", "uniqueCount": "8" }, { "technical_tags": "OPTIONAL", "minValue": "0.0", "maxValue": "2.0", "meanValue": "0.09056650654298494", "nullCount": "8472", "column_type": "INT32", "stdDev": "0.2992883049306765", "columnName": "AR", "uniqueCount": "3" }, { "technical_tags": "OPTIONAL", "minValue": "0.0", "maxValue": "9.0", "meanValue": "1.1365726548130015", "nullCount": "0", "column_type": "INT32", "stdDev": "1.1315638850115657", "columnName": "FTAG", "uniqueCount": "10" }, { "technical_tags": "OPTIONAL", "minValue": "0.0", "maxValue": "5.0", "meanValue": "0.49772179040471365", "nullCount": "2772", "column_type": "INT32", "stdDev": "0.718303252925753", "columnName": "HTAG", "uniqueCount": "6" }, { "technical_tags": "OPTIONAL", "minValue": "0.0", "maxValue": "5.0", "meanValue": "0.684937014205302", "nullCount": "2772", "column_type": "INT32", "stdDev": "0.8356231684953207", "columnName": "HTHG", "uniqueCount": "6" }, { "technical_tags": "OPTIONAL", "nullCount": "0", "column_type": "STRING", "columnName": "FTR", "uniqueCount": "3" }, { "technical_tags": [ "OPTIONAL", "AdjustedToUTC", "MICROS" ], "column_type": "TIMESTAMP", "columnName": "DateTime" }, { "technical_tags": "OPTIONAL", "minValue": "0.0", "maxValue": "3.0", "meanValue": "0.06298658273977123", "nullCount": "8472", "column_type": "INT32", "stdDev": "0.25394533837073197", "columnName": "HR", "uniqueCount": "4" }, { "technical_tags": "OPTIONAL", "minValue": "0.0", "maxValue": "43.0", "meanValue": "13.500000000000043", "nullCount": "8472", "column_type": "INT32", "stdDev": "5.249926294181544", "columnName": "HS", "uniqueCount": "40" }, { "technical_tags": "OPTIONAL", "nullCount": "0", "column_type": "STRING", "columnName": "HomeTeam", "uniqueCount": "50" }, { "technical_tags": "OPTIONAL", "minValue": "0.0", "maxValue": "20.0", "meanValue": "6.0981033625973495", "nullCount": "8472", "column_type": "INT32", "stdDev": "3.097059103646822", "columnName": "HC", "uniqueCount": "21" } ] } }, "name": "data-container" }
The Job Summarizer Executor has the option to configure a groovy script to filter which rows will be processed.
Example:
// This script must return a boolean. // The references of the job, doc, component, row and table objects are available. // Javadoc references // Row (row) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Row.html // Table (table) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Table.html row.getBoolean("sensitive") == true