...
Code Block | ||||||||
---|---|---|---|---|---|---|---|---|
| ||||||||
{ "config": { "apiPort": 8080, "libraryJars": ["./lib"], "actionManagerConfig": { "actions": "actions-provider:actiongroups" }, "actionProviders": [ { "name": "constant", "display": "Constant", "type": "Constant", "fetchesData": false }, { "name": "Human", "display": "Human", "type": "Elasticsearch", "fetchesData": true, "transformation": "./transformations/es-transformation.js", "hosts": [ { "host": "localhost", "port": 9200, "schema": "http" } ], "index": "wikidata", "query": { "bool": { "should": [ { "term": { "_id": { "value": "{{human._id}}", "boost": 9999 } } }, { "match_phrase": { "label": { "query": "{{human}}", "boost": 1 } } }, { "match_phrase": { "aliases": { "query": "{{human}}", "boost": 2 } } }, { "query_string": { "query": "{{human.match}}" } } ] } } }, { "name": "geography", "display": "Geography", "type": "Elasticsearch", "fetchesData": true, "transformation": "./transformations/es-transformation.js", "hosts": [ { "host": "localhost", "port": 9200, "schema": "http" } ], "index": "wikidata", "query": { "bool": { "should": [ { "term": { "_id": { "value": "{{geography._id}}", "boost": 9999 } } }, { "match_phrase": { "label": { "query": "{{geography}}", "boost": 1 } } }, { "match_phrase": { "aliases": { "query": "{{geography}}", "boost": 2 } } }, { "query_string": { "query": "{{geography.match}}" } } ] } } }, { "name": "currency", "display": "Currency", "type": "Openexchange", "transformation": "./transformations/openexchange-transformation.js", "fetchesData": false, "appId": "6cfdc0df634a4796b1a40748cdbe6006" } ], "providers": [ { "name": "actions-provider", "type": "MongoDB", "uri": "mongodb://localhost:27017", "database": "sagaDB", "aggregation": [...], "transactionCollection": "transactions", "fetchTimestamp": [...] }, { "name": "entity-provider", "type": "MongoDB", "uri": "mongodb://localhost:27017", "database": "sagaDB", "aggregation": [...], "transactionCollection": "transactions", "fetchTimestamp": [...] }, { "name": "patterns-provider", "type": "MongoDB", "uri": "mongodb://localhost:27017", "database": "sagaDB", "aggregation": [...], "transactionCollection": "transactions", "fetchTimestamp": [...] }, { "name": "regex-provider", "type": "MongoDB", "uri": "mongodb://localhost:27017", "database": "sagaDB", "aggregation": [...], "transactionCollection": "transactions", "fetchTimestamp": [...] }, { ]"name": "equipment-provider", } } |
...
...
"type": "MongoDB",
"uri": "mongodb://localhost:27017",
"database": "sagaDB",
"aggregation": [...],
"transactionCollection": "transactions",
"fetchTimestamp": [...]
},
{
"name": "unit-provider",
"type": "MongoDB",
"uri": "mongodb://localhost:27017",
"database": "sagaDB",
"aggregation": [...],
"transactionCollection": "transactions",
"fetchTimestamp": [...]
}
]
}
} |
...
The other file part of the configuration is the pipelines.json files, which holds the pipelines definition, currently we have 2 pipelines for Saga Parser:
Besides that, the pipelines are exactly as decribe in Configure Pipelines & Resource Providers
Code Block | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|
| ||||||||||
{
"config": {
"pipelineConfiguration": {
"process": {
"reader": {
"type": "SimpleReader",
"splitRegex": "[\r\n]+"
},
"stages": [
{
"type": "SentenceBreakerStage"
},
{
"type": "WhitespaceTokenizerStage",
"requiredFlags": [
"SENTENCE"
]
},
{
"type": "CharacterSplitter"
},
{
"type": "CharChangeSplitter",
"case": true,
"number": true,
"punctuation": true
},
{
"type": "LemmatizeStage",
"exclude": [
"ob",
"syn",
"alt"
],
"skipFlags": [
"ALL_PUNCTUATION"
]
},
{
"type": "CaseAnalysisStage"
},
{
"type": "NumberRecognizer"
},
{
"type": "StopWordsStage"
},
{
"type": "RegexPatternStage",
"patterns": "regex-provider:patterns",
"caseInsensitive": true,
"boundaryFlags": [
"SENTENCE_SPLIT"
]
},
{
"type": "DictionaryTaggerStage",
"dictionary": "entity-provider:entities",
"skipFlags": [
"SKIP"
],
"boundaryFlags": [
"SENTENCE_SPLIT"
],
"requiredFlags": [
"TOKEN",
"ALL_LOWER_CASE"
],
"ignoreTags": [
"root"
],
"debug": true
},
{
"type": "AdvancedPattern",
"skipFlags": [
"SKIP"
],
"patterns": "patterns-provider:patterns",
"debug": true
}
]
},
"ml": {
"reader": {
"type": "SimpleReader",
"splitRegex": "[\r\n]+"
},
"stages": [
{
"type": "QuotationBreakerStage",
"singleQuotes": true
},
{
"type": "SentenceBreakerStage",
"skipFlags": [
"PROCESSED"
]
},
{
"type": "WhitespaceTokenizerStage",
"requiredFlags": [
"SENTENCE"
]
},
{
"type": "CharChangeSplitter",
"case": false,
"number": true,
"punctuation": true
},
{
"type": "CaseAnalysisStage"
},
{
"type": "StopWordsStage"
},
{
"type": "RegexPatternStage",
"patterns": "regex-provider:patterns",
"caseInsensitive": true,
"boundaryFlags": [
"SENTENCE_SPLIT"
]
},
{
"type": "DictionaryTaggerStage",
"dictionary": "unit-provider:entities",
"skipFlags": [
"SKIP"
],
"boundaryFlags": [
"SENTENCE_SPLIT"
],
"requiredFlags": [
"TOKEN",
"ALL_LOWER_CASE"
],
"ignoreTags": [
"root"
]
},
{
"type": "AdvancedPattern",
"skipFlags": [
"SKIP"
],
"patterns": "patterns-provider:patterns"
},
{
"type": "DictionaryTaggerStage",
"dictionary": "equipment-provider:entities",
"skipFlags": [
"SKIP"
],
"boundaryFlags": [
"SENTENCE_SPLIT"
],
"requiredFlags": [
"TOKEN",
"ALL_LOWER_CASE"
],
"ignoreTags": [
"root"
]
}
]
}
}
}
} |
Note |
---|
The default pipelines configuration can change in time, with the creation of new stages and the improvements to the code |
...