You are viewing an old version of this page. View the current version.

Compare with Current View Page History

« Previous Version 3 Next »


Introduction


The Job Summarizer Executor is able to process the table data contained in an Aspire job and fetch the associated rows from an Elasticsearch index. Each extracted row will be processed by the summarizers attached to the job.

Job based summarization

The Job Summarizers executor allows to summarize data based on the table structue contained by a job.

Example of supported table structure:

Row Filter
{
  "container": {
    "repItemType": "aspire/folder",
    "seed": {
      "description": "s3",
      "id": "a8c0c88a-d3b4-42fb-b27d-57137ab85154",
      "type": "s3",
      "properties": {
        "tag1": "value1",
        "seed": "/qa-s3-storage/test-level1/split container/",
        "processSplitFiles": "true",
        "usePrefixesForSplitCheck": "true",
        "splitCheckPrefix": "part-"
      },
      "tags": [
        "darwin"
      ]
    },
    "isContainer": "TYPE-NOT-PROVIDED",
    "connectorSpecific": {
      "skippedRows": "0",
      "rowCount": "32622",
      "childId": [
        "/qa-s3-storage/test-level1/split container/part-00000-d91360fd-0995-4af2-9998-39454c778297-c000.parquet",
        "/qa-s3-storage/test-level1/split container/part-00002-d91360fd-0995-4af2-9998-39454c778297-c000.parquet",
        "/qa-s3-storage/test-level1/split container/part-00001-d91360fd-0995-4af2-9998-39454c778297-c000.parquet"
      ]
    },
    "title": "split container",
    "url": "/qa-s3-storage/test-level1/split container/",
    "samples": [{
        "AST": null,
        "AC": null,
        "HomeTeam": "West Ham",
        "AwayTeam": "Wimbledon",
        "FTR": "A",
        "Referee": null,
        "AF": null,
        "HTR": null,
        "HST": null,
        "HTHG": null,
        "HR": null,
        "HS": null,
        "FTHG": "0",
        "DateTime": "745286400000000",
        "HY": null,
        "AR": null,
        "AS": null,
        "HTAG": null,
        "FTAG": "2",
        "AY": null,
        "HC": null,
        "Season": "1993-94",
        "HF": null
      }, {
        "AST": null,
        "AC": null,
        "HomeTeam": "Chelsea",
        "AwayTeam": "QPR",
        "FTR": "H",
        "Referee": null,
        "AF": null,
        "HTR": null,
        "HST": null,
        "HTHG": null,
        "HR": null,
        "HS": null,
        "FTHG": "2",
        "DateTime": "746236800000000",
        "HY": null,
        "AR": null,
        "AS": null,
        "HTAG": null,
        "FTAG": "0",
        "AY": null,
        "HC": null,
        "Season": "1993-94",
        "HF": null
      }
    ],
    "displayurl": "/qa-s3-storage/test-level1/split container/",
    "crawlStart": "2022-06-07T19:58:20Z",
    "ingestionEnd": "2022-06-07T19:58:54Z",
    "submitTime": "2022-06-07T19:58:55+0000",
    "ingestionStart": "2022-06-07T19:58:50Z",
    "dataProfile": {
      "columns": [{
          "technical_tags": "OPTIONAL",
          "nullCount": "0",
          "column_type": "STRING",
          "columnName": "AwayTeam",
          "uniqueCount": "50"
        }, {
          "technical_tags": "OPTIONAL",
          "nullCount": "8472",
          "column_type": "STRING",
          "columnName": "Referee",
          "uniqueCount": "154"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "33.0",
          "meanValue": "11.41498260725533",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "3.785881246274845",
          "columnName": "HF",
          "uniqueCount": "30"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "24.0",
          "meanValue": "6.159392082159955",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "3.3342743104428822",
          "columnName": "HST",
          "uniqueCount": "24"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "9.0",
          "meanValue": "1.7452791121417932",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "1.2683279579638864",
          "columnName": "AY",
          "uniqueCount": "10"
        }, {
          "technical_tags": "OPTIONAL",
          "nullCount": "0",
          "column_type": "STRING",
          "columnName": "Season",
          "uniqueCount": "29"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "9.0",
          "meanValue": "1.5191146264447137",
          "nullCount": "0",
          "column_type": "INT32",
          "stdDev": "1.301419064816484",
          "columnName": "FTHG",
          "uniqueCount": "10"
        }, {
          "technical_tags": "OPTIONAL",
          "nullCount": "2772",
          "column_type": "STRING",
          "columnName": "HTR",
          "uniqueCount": "3"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "20.0",
          "meanValue": "4.784661255590524",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "2.7948464257686143",
          "columnName": "AST",
          "uniqueCount": "21"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "19.0",
          "meanValue": "4.78503395726357",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "2.7298022947989997",
          "columnName": "AC",
          "uniqueCount": "20"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "1.0",
          "maxValue": "29.0",
          "meanValue": "11.915935067086311",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "3.954860578157097",
          "columnName": "AF",
          "uniqueCount": "29"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "30.0",
          "meanValue": "10.61669703495112",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "4.562810353472809",
          "columnName": "AS",
          "uniqueCount": "31"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "7.0",
          "meanValue": "1.408025509358946",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "1.184736178155765",
          "columnName": "HY",
          "uniqueCount": "8"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "2.0",
          "meanValue": "0.09056650654298494",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "0.2992883049306765",
          "columnName": "AR",
          "uniqueCount": "3"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "9.0",
          "meanValue": "1.1365726548130015",
          "nullCount": "0",
          "column_type": "INT32",
          "stdDev": "1.1315638850115657",
          "columnName": "FTAG",
          "uniqueCount": "10"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "5.0",
          "meanValue": "0.49772179040471365",
          "nullCount": "2772",
          "column_type": "INT32",
          "stdDev": "0.718303252925753",
          "columnName": "HTAG",
          "uniqueCount": "6"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "5.0",
          "meanValue": "0.684937014205302",
          "nullCount": "2772",
          "column_type": "INT32",
          "stdDev": "0.8356231684953207",
          "columnName": "HTHG",
          "uniqueCount": "6"
        }, {
          "technical_tags": "OPTIONAL",
          "nullCount": "0",
          "column_type": "STRING",
          "columnName": "FTR",
          "uniqueCount": "3"
        }, {
          "technical_tags": [
            "OPTIONAL",
            "AdjustedToUTC",
            "MICROS"
          ],
          "column_type": "TIMESTAMP",
          "columnName": "DateTime"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "3.0",
          "meanValue": "0.06298658273977123",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "0.25394533837073197",
          "columnName": "HR",
          "uniqueCount": "4"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "43.0",
          "meanValue": "13.500000000000043",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "5.249926294181544",
          "columnName": "HS",
          "uniqueCount": "40"
        }, {
          "technical_tags": "OPTIONAL",
          "nullCount": "0",
          "column_type": "STRING",
          "columnName": "HomeTeam",
          "uniqueCount": "50"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "20.0",
          "meanValue": "6.0981033625973495",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "3.097059103646822",
          "columnName": "HC",
          "uniqueCount": "21"
        }
      ]
    }
  },
  "name": "data-container"
}

Summarize a table based on the structure contained in 

Rows Filtering

The Job Summarizer Executor has the option to configure a groovy script to filter which rows will be processed.

Example:

Row Filter
// This script must return a boolean.
// The references of the job, doc, component, row and table objects are available.
// Javadoc references 
// Row (row) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Row.html
// Table (table) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Table.html
row.getBoolean("sensitive") == true
  • No labels