Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.


Easy Heading Free
navigationTitleOn this Page
wrapNavigationTexttrue
navigationExpandOptionexpand-all-by-default

Introduction


The Job Summarizer Executor is able to process the table data contained in an Aspire job and fetch the associated rows from an Elasticsearch index. Each extracted row will be processed by the summarizers attached to the job.

Job based summarization

The Job Summarizers executor allows to summarize data based on the table structue contained by a job.

Example of supported table structure:

Code Block
themeRDark
titleRow Filter
{
  "container": {
    "repItemType": "aspire/folder",
    "seed": {
      "description": "s3",
      "id": "a8c0c88a-d3b4-42fb-b27d-57137ab85154",
      "type": "s3",
      "properties": {
        "tag1": "value1",
        "seed": "/qa-s3-storage/test-level1/split container/",
        "processSplitFiles": "true",
        "usePrefixesForSplitCheck": "true",
        "splitCheckPrefix": "part-"
      },
      "tags": [
        "darwin"
      ]
    },
    "isContainer": "TYPE-NOT-PROVIDED",
    "connectorSpecific": {
      "skippedRows": "0",
      "rowCount": "32622",
      "childId": [
        "/qa-s3-storage/test-level1/split container/part-00000-d91360fd-0995-4af2-9998-39454c778297-c000.parquet",
        "/qa-s3-storage/test-level1/split container/part-00002-d91360fd-0995-4af2-9998-39454c778297-c000.parquet",
        "/qa-s3-storage/test-level1/split container/part-00001-d91360fd-0995-4af2-9998-39454c778297-c000.parquet"
      ]
    },
    "title": "split container",
    "url": "/qa-s3-storage/test-level1/split container/",
    "samples": [{
        "ASTRow1": null"text",
        "ACRow2": null,
        "HomeTeamRow3": "West Ham"5,
        "AwayTeamRow4": "Wimbledontext",
        "FTRRow5": "A745286400000000",
        "Referee": null,}, 
    ],
    "AFdisplayurl": null"/qa-s3-storage/test-level1/split container/",
        "HTR": null,
    "crawlStart": "2022-06-07T19:58:20Z",
    "HSTingestionEnd": null,
        "HTHG": null "2022-06-07T19:58:54Z",
        "HRsubmitTime": null,
        "HS": null,
        "FTHG": "0",
        "DateTime": "745286400000000",
        "HY": null,
        "AR": null,
        "AS": null,
        "HTAG": null,
        "FTAG": "2",
        "AY": null,
        "HC": null,
        "Season": "1993-94",
        "HF": null
      }, {
        "AST": null,
        "AC": null,
        "HomeTeam": "Chelsea",
        "AwayTeam": "QPR",
        "FTR": "H",
        "Referee": null,
        "AF": null,
        "HTR": null,
        "HST": null,
        "HTHG": null,
        "HR": null,
        "HS": null,
        "FTHG": "2",
        "DateTime": "746236800000000",
        "HY": null,
        "AR": null,
        "AS": null,
        "HTAG": null,
        "FTAG": "0",
        "AY": null,
        "HC": null,
        "Season": "1993-94",
        "HF": null
      }
    ],
    "displayurl": "/qa-s3-storage/test-level1/split container/",
    "crawlStart": "2022-06-07T19:58:20Z",
    "ingestionEnd": "2022-06-07T19:58:54Z",
    "submitTime": "2022-06-07T19:58:55+0000",
    "ingestionStart": "2022-06-07T19:58:50Z",
    "dataProfile": {
      "columns": [{
          "technical_tags": "OPTIONAL",
          "nullCount": "0",
          "column_type": "STRING",
          "columnName": "AwayTeam",
          "uniqueCount": "50"
        }, {
          "technical_tags": "OPTIONAL",
          "nullCount": "8472",
          "column_type": "STRING",
          "columnName": "Referee",
          "uniqueCount": "154"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "33.0",
          "meanValue": "11.41498260725533",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "3.785881246274845",
          "columnName": "HF",
          "uniqueCount": "30"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "24.0",
          "meanValue": "6.159392082159955",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "3.3342743104428822",
          "columnName": "HST",
          "uniqueCount": "24"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "9.0",
          "meanValue": "1.7452791121417932",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "1.2683279579638864",
          "columnName": "AY",
          "uniqueCount": "10"
        }, {
          "technical_tags": "OPTIONAL",
          "nullCount": "0",
          "column_type": "STRING",
          "columnName": "Season",
          "uniqueCount": "29"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "9.0",
          "meanValue": "1.5191146264447137",
          "nullCount": "0",
          "column_type": "INT32",
          "stdDev": "1.301419064816484",
          "columnName": "FTHG",
          "uniqueCount": "10"
        }, {
          "technical_tags": "OPTIONAL",
          "nullCount": "2772",
          "column_type": "STRING",
          "columnName": "HTR",
          "uniqueCount": "3"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "20.0",
          "meanValue": "4.784661255590524",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "2.7948464257686143",
          "columnName": "AST",
          "uniqueCount": "21"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "19.0",
          "meanValue": "4.78503395726357",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "2.7298022947989997",
          "columnName": "AC",
          "uniqueCount": "20"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "1.0",
          "maxValue": "29.0",
          "meanValue": "11.915935067086311",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "3.954860578157097",
          "columnName": "AF",
          "uniqueCount": "29"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "30.0",
          "meanValue": "10.61669703495112",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "4.562810353472809",
          "columnName": "AS",
          "uniqueCount": "31"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "7.0",
          "meanValue": "1.408025509358946",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "1.184736178155765",
          "columnName": "HY",
          "uniqueCount": "8"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "2.0",
          "meanValue": "0.09056650654298494",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "0.2992883049306765",
          "columnName": "AR",
          "uniqueCount": "3"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "9.0",
          "meanValue": "1.1365726548130015",
          "nullCount": "0",
          "column_type": "INT32",
          "stdDev": "1.1315638850115657",
          "columnName": "FTAG",
          "uniqueCount": "10"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "5.0",
          "meanValue": "0.49772179040471365",
          "nullCount": "2772",
          "column_type": "INT32",
          "stdDev": "0.718303252925753",
          "columnName": "HTAG",
          "uniqueCount": "6"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "5.0",
          "meanValue": "0.684937014205302",
          "nullCount": "2772",
          "column_type": "INT32",
          "stdDev": "0.8356231684953207",
          "columnName": "HTHG",
          "uniqueCount": "6"
        }, {
          "technical_tags": "OPTIONAL",
          "nullCount": "0",
          "column_type": "STRING",
          "columnName": "FTR",
          "uniqueCount": "3"
        }, {
          "technical_tags": [
            "OPTIONAL",
            "AdjustedToUTC",
            "MICROS"
          ],"2022-06-07T19:58:55+0000",
    "ingestionStart": "2022-06-07T19:58:50Z",
    "dataProfile": {
      "columns": [{
          "columntechnical_typetags": "TIMESTAMPOPTIONAL",
          "columnNamenullCount": "DateTime"
        }, {0",
          "technicalcolumn_tagstype": "OPTIONALSTRING",
          "minValuecolumnName": "0.0Row1",
          "maxValueuniqueCount": "3.050",
          "meanValue": "0.06298658273977123",}, {
          "nullCounttechnical_tags": "8472OPTIONAL",
          "column_typenullCount": "INT328472",
          "stdDevcolumn_type": "0.25394533837073197STRING",
          "columnName": "HRRow2",
          "uniqueCount": "4154"
        }, {
          "technical_tags": "OPTIONAL",
          "minValue": "0.0",
          "maxValue": "4333.0",
          "meanValue": "1311.50000000000004341498260725533",
          "nullCount": "8472",
          "column_type": "INT32",
          "stdDev": "53.249926294181544785881246274845",
          "columnName": "HSRow3",
          "uniqueCount": "4030"
        }, {
          "technical_tags": "OPTIONAL",
          "nullCount": "0",
          "column_type": "STRING",
          "columnName": "HomeTeamRow4",
          "uniqueCount": "50"
        }, {
          "technical_tags": "OPTIONAL",
  3"
        "minValue": "0.0",}, {
          "maxValuetechnical_tags": "20.0",[
          "meanValue":  "6.0981033625973495OPTIONAL",
          "nullCount":  "8472AdjustedToUTC",
           "column_type": "INT32MICROS",
          "stdDev": "3.097059103646822"],
          "columnNamecolumn_type": "HCTIMESTAMP",
          "uniqueCountcolumnName": "21Row5"
        }
      ]
    }
  },
  "name": "data-container"
}

Summarize a table based on the structure contained in 

Rows Filtering

The Job Summarizer Executor has the option to configure a groovy script to filter which rows will be processed.

Example:

Code Block
themeRDark
titleRow Filter
// This script must return a boolean.
// The references of the job, doc, component, row and table objects are available.
// Javadoc references 
// Row (row) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Row.html
// Table (table) - http://{manager}/javadocs/com/accenture/aspire/services/summarization/Table.html
row.getBoolean("sensitive") == true