Field | Required | Default | Multiple | Notes | Example | ||||
---|---|---|---|---|---|---|---|---|---|
properties | Yes | - | No | Configuration object | |||||
sourceFieldsetTempFile | Yes | falsetrue | No | Selects how to enter the Elasticsearch URL. True: server and port are specified. False: full URL is specified. The path to the folder where the temporal files were processed. Furthermore, you can set a System Variable "user.dir" with the path | true/false | ||||
detectedLanguageFieldtempFilesPath | Yes No | "detected_language${aspire.home}/temp" | No | The field where the main detected language will be written. | "detected_language" | ||||
detectionType | Yes | "All" | No | Select the type of detection to be used. | "All" | ||||
languagePool | No | "languagePool" | No | If detectionType "All" is selected, the value must be "languagePool" | "languagePool" | ||||
path to the folder where the temporal files were being processed | "${aspire.home}/temp" | ||||||||
maxFileSize | Yes | false | No | Maximum characters for the file loaded into memory to be parsed. (False will be set to unlimited) | true/false | ||||
characters | No | 1000000 | No | The max number of characters as a limit for the number of characters to be processed, to prevent memory consumption | 1000000 | ||||
addParentInfo | Yes | extractConfidenceValues | No | false | No | Check if confidence statistics extraction is required. | false | you want to add extra info from the parent to every slide of the file | true/false |
parentFielddetectionOutput | No | "detection_outputdatamodel" | No | Only required if "extractConfidenceValues" is enabled. The field where all detected languages and its statistics will be written. | "detection_output" | ||||
debug | No | false | No | Check to enable debug messages. | false | ||||
limitTextSize | No | false | No | Limit Source Field text size | false | ||||
textSizeLimit | No | 128 | No | Only required if "limitTextSize" is enabled. Maximum size for Source Field's text. | 128 | ||||
includeISOCodes | No | false | No | If enabled, ISO codes will be included in the document. | false | ||||
includeISO639_1 | No | false | No | Check to include the ISO 639-1 codes in the document. | false | ||||
Specify the name of the field from the parent job to add as extra info to every slide | "datamodel" | ||||||||
threadTimeout | Yes | 15 | No | Maximum time (in minutes) for the processing thread to wait for the parsing result | 15 | ||||
subJobSleep | Yes | 60000 | No | Time to wait (in milliseconds) until all sub jobs are done | 60000 | ||||
debugincludeISO639_3 | No | false | No | Check to include the ISO 639-3 codes in the document. | Set to true to enable debug messages. | true/false |
...
Code Block | ||||
---|---|---|---|---|
| ||||
# All languages { "type": "application", "_type": "application", "description": "sample-ldslide-2extractor", "config": "com.accenture.aspire:app-language-detector", "appType": "language-detector", "appName": "Language Detector", "properties": { "sourceField": "content", "detectedLanguageField": "detected_language", "detectionType": "All", "languagePool": "languagePool", "extractConfidenceValues": false, "detectionOutput": "detection_output", "debug": false, "limitTextSize": false, "textSizeLimit": 128, "includeISOCodes": false, "includeISO639_1": false, "includeISO639_3": false } } # Spoken Language { "type": "application", "_type": "application", "description": "sample-ld-3", "config": "com.accenture.aspire:app-languageslide-detectorextractor", "appType": "languageslide-detectorextractor", "appName": "LanguageSlide DetectorExtractor", "properties": { "sourceField "setTempFile": "content"true, "detectedLanguageField": "detected_language", "detectionType": "Spoken", "languagePool": "languagePool", "extractConfidenceValues": false, "detectionOutput": "detection_output", "debug": false, "limitTextSize": false, "textSizeLimit": 128, "includeISOCodes": false, "includeISO639_1": false, "includeISO639_3": false } } # Cyrillic Language { "type": "application", "_type": "application", "description": "sample-ld-3", "config": "com.accenture.aspire:app-language-detector", "appType": "language-detector", "appName": "Language Detector", "properties": { "sourceField": "content", "detectedLanguageField": "detected_language", "detectionType": "Cyrillic", "languagePool": "languagePool", "extractConfidenceValues": false, "detectionOutput": "detection_output", "debug": false, "limitTextSize": false, "textSizeLimit": 128, "includeISOCodes": false, "includeISO639_1": false, "includeISO639_3": false } } #Specific { "type": "application", "_type": "application", "description": "sample-ld-1", "config": "com.accenture.aspire:app-language-detector", "appType": "language-detector", "appName": "Language Detector", "properties": { "sourceField": "content", "detectedLanguageField": "detected_language", "detectionType": "Specific", "languagePool": [ "Afrikaans", "Albanian" ], "extractConfidenceValues": false, "detectionOutput": "detection_output", "debug": false, "limitTextSize": false, "textSizeLimit": 128, "includeISOCodes": false, "includeISO639_1": false, "includeISO639_3": false } } #Except { "type": "application", "_type": "application", "description": "sample-ld-1", "config": "com.accenture.aspire:app-language-detector", "appType": "language-detector", "appName": "Language Detector", "properties": { "sourceField": "content", "detectedLanguageField": "detected_language", "detectionType": "Except", "languagePool": [ "Afrikaans", "Albanian" ], "extractConfidenceValues": false, "detectionOutput": "detection_output", "debug": false, "limitTextSize": false, "textSizeLimit": 128, "includeISOCodes": false, "includeISO639_1": false, "includeISO639_3": false } "tempFilesPath": "${aspire.home}/temp", "maxFileSize": true, "characters": "1000000", "addParentInfo": true, "parentField": "datamodel", "threadTimeout": 15, "subJobSleep": 60000, "debug": false } } |