When we refer to Saga Solution we meant the combination of the Saga UI, Saga Server and Saga Library, among other utilities created to accelerate the creation of a NLP base. In the diagram below you can see how each part fits and how the App and Data Storage (which Saga is independent of) interacts with Saga Solution.
Table of Contents | ||
---|---|---|
|
Starting the server is quite easy and can be done with this 2 options:
This execution means using the embedded configuration inside the server which expects a MongoDB called "sagaDB" with the collections and the formats expected from Saga Admin UI.
Code Block | ||||
---|---|---|---|---|
| ||||
java -jar -Dfile.encoding=UTF-8 saga-server-parser-0.0.1-SNAPSHOT.jar |
This execution requires for a config.js and a pipelines.json file to be passed as parameters of the application, we recommend to overwrite the files within the jar of the server, in order to use and preserve the Saga Admin UI format of the database.
Code Block | ||||
---|---|---|---|---|
| ||||
java -jar -Dfile.encoding=UTF-8 saga-server-parser-0.0.1-SNAPSHOT.jar config.json pipelines.json |
Bellow is a partial example of the Server Parser config, we omitted the aggregations and fetchTimestamp aggregation from the providers to reduce space in this example:
Code Block | ||||||||
---|---|---|---|---|---|---|---|---|
| ||||||||
{
"config": {
"apiPort": 8080,
"libraryJars": ["./lib"],
"actionManagerConfig": {
"actions": "actions-provider:actiongroups"
},
"actionProviders": [
{
"name": "constant",
"display": "Constant",
"type": "Constant",
"fetchesData": false
},
{
"name": "Human",
"display": "Human",
"type": "Elasticsearch",
"fetchesData": true,
"transformation": "./transformations/es-transformation.js",
"hosts": [
{
"host": "localhost",
"port": 9200,
"schema": "http"
}
],
"index": "wikidata",
"query": {
"bool": {
"should": [
{
"term": {
"_id": {
"value": "{{human._id}}",
"boost": 9999
}
}
},
{
"match_phrase": {
"label": {
"query": "{{human}}",
"boost": 1
}
}
},
{
"match_phrase": {
"aliases": {
"query": "{{human}}",
"boost": 2
}
}
},
{
"query_string": {
"query": "{{human.match}}"
}
}
]
}
}
},
{
"name": "geography",
"display": "Geography",
"type": "Elasticsearch",
"fetchesData": true,
"transformation": "./transformations/es-transformation.js",
"hosts": [
{
"host": "localhost",
"port": 9200,
"schema": "http"
}
],
"index": "wikidata",
"query": {
"bool": {
"should": [
{
"term": {
"_id": {
"value": "{{geography._id}}",
"boost": 9999
}
}
},
{
"match_phrase": {
"label": {
"query": "{{geography}}",
"boost": 1
}
}
},
{
"match_phrase": {
"aliases": {
"query": "{{geography}}",
"boost": 2
}
}
},
{
"query_string": {
"query": "{{geography.match}}"
}
}
]
}
}
},
{
"name": "currency",
"display": "Currency",
"type": "Openexchange",
"transformation": "./transformations/openexchange-transformation.js",
"fetchesData": false,
"appId": "6cfdc0df634a4796b1a40748cdbe6006"
}
],
"providers": [
{
"name": "actions-provider",
"type": "MongoDB",
"uri": "mongodb://localhost:27017",
"database": "sagaDB",
"aggregation": [...],
"transactionCollection": "transactions",
"fetchTimestamp": [...]
},
{
"name": "entity-provider",
"type": "MongoDB",
"uri": "mongodb://localhost:27017",
"database": "sagaDB",
"aggregation": [...],
"transactionCollection": "transactions",
"fetchTimestamp": [...]
},
{
"name": "patterns-provider",
"type": "MongoDB",
"uri": "mongodb://localhost:27017",
"database": "sagaDB",
"aggregation": [...],
"transactionCollection": "transactions",
"fetchTimestamp": [...]
},
{
"name": "regex-provider",
"type": "MongoDB",
"uri": "mongodb://localhost:27017",
"database": "sagaDB",
"aggregation": [...],
"transactionCollection": "transactions",
"fetchTimestamp": [...]
},
{
"name": "equipment-provider",
"type": "MongoDB",
"uri": "mongodb://localhost:27017",
"database": "sagaDB",
"aggregation": [...],
"transactionCollection": "transactions",
"fetchTimestamp": [...]
},
{
"name": "unit-provider",
"type": "MongoDB",
"uri": "mongodb://localhost:27017",
"database": "sagaDB",
"aggregation": [...],
"transactionCollection": "transactions",
"fetchTimestamp": [...]
}
]
}
} |
The other file part of the configuration is the pipelines.json files, which holds the pipelines definition, currently we have 2 pipelines for Saga Parser:
Besides that, the pipelines are exactly as decribe in Configure Pipelines & Resource Providers
Code Block | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|
| ||||||||||
{
"config": {
"pipelineConfiguration": {
"process": {
"reader": {
"type": "SimpleReader",
"splitRegex": "[\r\n]+"
},
"stages": [
{
"type": "SentenceBreakerStage"
},
{
"type": "WhitespaceTokenizerStage",
"requiredFlags": [
"SENTENCE"
]
},
{
"type": "CharacterSplitter"
},
{
"type": "CharChangeSplitter",
"case": true,
"number": true,
"punctuation": true
},
{
"type": "LemmatizeStage",
"exclude": [
"ob",
"syn",
"alt"
],
"skipFlags": [
"ALL_PUNCTUATION"
]
},
{
"type": "CaseAnalysisStage"
},
{
"type": "NumberRecognizer"
},
{
"type": "StopWordsStage"
},
{
"type": "RegexPatternStage",
"patterns": "regex-provider:patterns",
"caseInsensitive": true,
"boundaryFlags": [
"SENTENCE_SPLIT"
]
},
{
"type": "DictionaryTaggerStage",
"dictionary": "entity-provider:entities",
"skipFlags": [
"SKIP"
],
"boundaryFlags": [
"SENTENCE_SPLIT"
],
"requiredFlags": [
"TOKEN",
"ALL_LOWER_CASE"
],
"ignoreTags": [
"root"
],
"debug": true
},
{
"type": "AdvancedPattern",
"skipFlags": [
"SKIP"
],
"patterns": "patterns-provider:patterns",
"debug": true
}
]
},
"ml": {
"reader": {
"type": "SimpleReader",
"splitRegex": "[\r\n]+"
},
"stages": [
{
"type": "QuotationBreakerStage",
"singleQuotes": true
},
{
"type": "SentenceBreakerStage",
"skipFlags": [
"PROCESSED"
]
},
{
"type": "WhitespaceTokenizerStage",
"requiredFlags": [
"SENTENCE"
]
},
{
"type": "CharChangeSplitter",
"case": false,
"number": true,
"punctuation": true
},
{
"type": "CaseAnalysisStage"
},
{
"type": "StopWordsStage"
},
{
"type": "RegexPatternStage",
"patterns": "regex-provider:patterns",
"caseInsensitive": true,
"boundaryFlags": [
"SENTENCE_SPLIT"
]
},
{
"type": "DictionaryTaggerStage",
"dictionary": "unit-provider:entities",
"skipFlags": [
"SKIP"
],
"boundaryFlags": [
"SENTENCE_SPLIT"
],
"requiredFlags": [
"TOKEN",
"ALL_LOWER_CASE"
],
"ignoreTags": [
"root"
]
},
{
"type": "AdvancedPattern",
"skipFlags": [
"SKIP"
],
"patterns": "patterns-provider:patterns"
},
{
"type": "DictionaryTaggerStage",
"dictionary": "equipment-provider:entities",
"skipFlags": [
"SKIP"
],
"boundaryFlags": [
"SENTENCE_SPLIT"
],
"requiredFlags": [
"TOKEN",
"ALL_LOWER_CASE"
],
"ignoreTags": [
"root"
]
}
]
}
}
}
} |