We support these crawled repositories authentication types:
Field | Required | Default | Multiple | Notes | Example |
---|---|---|---|---|---|
type | yes | - | no | The value must be "rest-api". | "rest-api" |
description | yes | - | no | Name of the credential object. | "My REST Credential" |
properties | yes | - | no | Configuration object | |
type | yes | - | no | Authentication type: basic, apiToken, bearer, azure, none. | basic |
type: basic | |||||
loginAccount | yes | - | no | Username. | "admin" |
password | yes | - | no | Password (can be encrypted in Aspire fashion). | "adminPassword" |
type: apiToken | |||||
headerName | yes | - | no | The name of HTTP header field to be sent with a request. | "tokenName1" |
headerValue | yes | - | no | The value of the "headerName" field. | "tokenValue1" |
type: bearer | |||||
query | yes | - | no | bearer query: JSON object representing the query to be sent for getting the accessToken | |
urlTemplate | yes | - | no | The context path of the URL. | "/login" |
loginAccount | yes | - | no | Username. Used as a value for ${loginAccount} query body field. | "admin" |
password | yes | - | no | Password. Used as a value for ${password} query body field. | "adminPassword" |
method | yes | - | no | HTTP method. Must be POST in this version. | "POST" |
body | yes | - | no | The query body. Fields: ${loginAccount}, ${password} are expected to be used as a part of the body. | "{\"username\" : \"${username}\",\"password\" : \"${password}\"}" |
resultField | yes | - | no | The field in the response with the access token. | "accessToken" |
isFormBody | yes | false | no | Select if the request uses a Form body instead of a JSON body. | false |
setExpiresField | yes | false | no | Select if you want to specify the expires field. | false |
expiresType | yes | at | no | Select the type of token expiration: "at", "in". | "at" |
expiresAtField | no | - | no | The name of the field of the generated token (Only if expiresType is "at"). | "token" |
expiresAtFormat | no | yyyy-MM-dd'T'HH:mm:ss.SSSXXX | no | The date format of the generated token (Only if expiresType is "at"). | "yyyy-MM-dd'T'HH:mm:ss.SSSXXX" |
expiresInTime | no | false | no | Select if you want to specify the expiration of the token in a specific time (Only if expiresType is "in"). | false |
expiresInField | no | - | no | The name of the field of the generated token (Only if expiresType is "in"). | "token" |
timeType | no | - | no | The unit of the timestamp: "millis", "seconds", "minutes", "hours" (Only if expiresType is "in" and expiresInTime is "false"). | "millis" |
expiresInTimeType | no | - | no | The unit of the timestamp: "millis", "seconds", "minutes", "hours" (Only if expiresType is "in" and expiresInTime is "true"). | "minutes" |
expiresInField | no | - | no | The amount of time you want to wait for the token expiration (Only if expiresType is "in" and expiresInTime is "true"). | "10" |
setHeader | yes | false | no | Select if you want to specify the Authorization header name | false |
headerName | yes | Authorization | no | Authorization header name | "Authorization" |
customHeaders | yes | empty | no | Additional Headers | [{"field": "Accept", "value": "application/json"}] |
type: azure | |||||
client_id | yes | - | no | The Application (client) ID that the Azure portal - App registrations page assigned to your app | "1234ab567-89cd0" |
client_secret | yes | - | no | The credentials that authenticate the application | "1234ab567-89cd0" |
scope | yes | - | no | A space-separated list of scopes, or permissions, that the app requires | "user.read%20openid%20profile%20offline_access" |
urlTemplate | yes | - | no | The directory tenant that you want to log the user into. This can be in GUID or friendly name format | "myTenant" |
{ "type": "rest-api", "description": "My credential", "properties": { "type": "bearer", "query": { "urlTemplate": "/login", "username": "admin", "password": "encrypted:xxxxx", "method": "POST", "body": "{\"username\": \"${username}\", \"password\": \"${password}\"}", "resultField": "accessToken", "isFormBody": false, "setExpiresField": true, "expiresType": "in", "expiresInTime": true, "expiresInTimeType": "minutes", "expiresInField": "10", "setHeader": true, "headerName": "X-Tableau-Auth", "customHeaders": [ { "field": "Accept", "value": "application/json" } ] } } }
Field | Required | Default | Multiple | Notes | Example |
---|---|---|---|---|---|
type | Yes | - | No | The value must be "rest-api". | "rest-api" |
description | Yes | - | No | Name of the connection object. | "My REST Connection" |
throttlePolicy | No | - | No | Id of the throttle policy that applies to this connection object. | "6b235b333a1b" |
routingPolicies | No | [ ] | Yes | The ids of the routing policies that this connection will use. | ["17f75ce7d0c7", "d42780003b36"] |
deleteIncrementalPolicy | No | - | No | Id of the delete policy that applies to this connection object. | "6b235b333a1b" |
credential | Yes | - | No | Id of the credential | "6b235b333a1b" |
properties | Yes | - | No | Configuration object | |
baseUrl | Yes | - | No | Your rest service API url | "https://your-service/api/v2/" |
connectionTimeout | Yes | 10000 | No | The period of time (ms) in which the connection will be close | 10000 |
socketTimeout | Yes | 10000 | No | The period of time (ms) in which the socket will be close | 10000 |
maxRetries | Yes | 3 | No | The number of connections retries | 3 |
useThrottling | Yes | false | No | If you want to use connection Throttling | false |
throttling | Yes | throttling_false | No | Constant required if useThrottling is false | "throttling_false" |
throttlingConnectionRate | No | 500 | No | The number of calls that your API can receive within a fixed period of time (Required if useThrottling is true) | 500 |
throttlingRate | No | 5000 | No | The period of fixed time in which the number of API calls can be received (in milliseconds) ( Required if useThrottling is true) | 5000 |
useProxy | Yes | false | No | If you want to configure a proxy | false |
proxy | Yes | proxy_false | No | Constant required if useProxy is false | "proxy_false" |
proxyProtocol | Yes | http | No | The protocol of the proxy connection | "http" |
proxyHost | Yes | - | No | Your proxy host | "myHostName" |
proxyPort | Yes | 3128 | No | Your proxy port | 3128 |
useProxyAuth | Yes | false | No | If you want to configure a proxy authentication | false |
proxyAuth | Yes | proxyAuth_false | No | Constant required if useProxyAuth is false | "proxyAuth_false" |
proxyUser | Yes | - | No | Proxy Username | "Administrator" |
proxyPass | Yes | - | No | Proxy Password | "adminPassword" |
trustAllCertificates | Yes | false | No | If selected, no HTTPS certificate validation will be done. | true |
{ "type": "rest-api", "description": "Rest conn 3", "credential": "0b6fd9c8-d722-4874-aca1-e57c6eff2089", "properties": { "baseUrl": "http://aspire_manager:50443/aspire/_api", "connectionTimeout": 10000, "socketTimeout": 10000, "maxRetries": 3, "useThrottling": false, "throttling": "throttling_false", "useProxy": false, "proxy": "proxy_false", "trustAllCertificates": false } }
Field | Required | Default | Multiple | Notes | Example |
---|---|---|---|---|---|
id | Yes | - | No | Id of the connection to update | "d442adcab4b0", |
description | No | - | No | Name of the connection object. | "My REST Connection" |
throttlePolicy | No | - | No | Id of the throttle policy that applies to this connection object. | "b3a9-6b235b333a1b" |
routingPolicies | No | [ ] | Yes | The ids of the routing policies that this connection will use. | ["17f75ce7d0c7", "d42780003b36"] |
credential | No | - | No | Id of the credential | "6b235b333a1b" |
properties | No | - | No | Configuration object | |
(see create connection) |
{ "id": "89d6632a-a296-426c-adb0-d442adcab4b0", "description": "REST connection", "properties": { "baseUrl": "http://aspire_manager:50443/aspire/_api" } }
Field | Required | Default | Multiple | Notes | Example |
---|---|---|---|---|---|
seed | Yes | - | No | N/A | "N/A" |
type | Yes | - | No | The value must be "rest-api". | "rest-api" |
description | Yes | - | No | Name of the seed object. | "My REST Seed" |
connector | Yes | - | No | The id of the connector to be used with this seed. The connector type must match the seed type. | "e3ca414b0d31" |
connection | Yes | - | No | The id of the connection to be used with this seed. The connection type must match the seed type. | "e4a663fe9ee6" |
workflows | No | [ ] | Yes | The ids of the workflows that will be executed for the documents crawled. | ["5696c3f0bda4"] |
throttlePolicy | No | - | No | Id of the throttle policy that applies to this seed object. | "6b235b333a1b" |
routingPolicies | No | [ ] | Yes | The ids of the routing policies that this seed will use. | ["17f75ce7d0c7", "d42780003b36"] |
deleteIncrementalPolicy | No | - | No | Id of the delete policy that applies to this connection object. | "6b235sd23423b" |
tags | No | [ ] | Yes | The tags of the seed. These can be used to filter the seed | ["tag1", "tag2"] |
properties | Yes | - | No | Configuration object | |
seed | Yes | - | No | N/A | "N/A" |
crawlRules | Yes | - | Yes | Crawl rules | |
conditionalScript | Yes | true | No | Groovy condition to determine which items should execute this set of queries. Groovy script to determine if a given item should execute this set of queries. The following matches the root item: item.getType().toString().equals('root') The following matches any extracted entity from a scan: item.getType().toString().equals('entity') | "item.getType().toString().equals('root')" |
entityType | Yes | - | No | Entity type to match (required if conditionalScript is false) | "root" |
shouldStop | No | false | No | If selected, then no other queries will be executed for the given item. | true |
shouldIndex | No | false | No | If selected, the item matching this crawl rule will be indexed. | true |
queries | No | - | Yes | Crawl rules: Queries to execute inside the rule | |
scriptedQuery | Yes | false | No | If use custom scripted query | false |
groovyScan | Yes | false | If the current script should be able to extract new items (Required if scriptedQuery is true) | false | |
queryType | Yes | metadataExtraction | No | Query type: metadataExtraction or scan (Required if scriptedQuery is true) | metadataExtraction |
groovyScript | Yes | - | No | Custom query executed with groovy (Required if scriptedQuery is true) | "rest.execute(\"/relative-to-base/action\")" |
urlTemplate | Yes | - | No | The query to execute. If ${metadataParameter} is found inside the field, it will be replaced with a specific value (for example from the scan result entity) | "/serviceEndpoint/${name}" |
method | Yes | GET | No | HTTP method. Options: GET, POST, PUT | "GET" |
body | Yes | - | No | The body of the POST or PUT body. Can include parameters to be replaced as: ${param1.paramA} (Required if method POST or PUT) | "{\"username\" : \"${username}\",\"password\" : \"${password}\"}" |
jsonRequest | Yes | true | No | If the request body is json | true |
contentType | Yes | json | No | The body mime type: json/xml/text (Required if jsonRequest is false) | "xml" |
customHeaders | Yes | [] | Yes | Custom headers configuration for the request | [{"field": "Accept","value": "application/json"}] |
queryType | Yes | scan | No | The query type: scan/metadataExtraction/binaryFetch | "scan" |
Scan | |||||
childrenPath | No | - | No | Extraction path. The path to the response array that contains the children to extract. For example, if the response comes as {"response":{"entitities":[{1},{2},{..},{n}]}} response.entities should be used. If the array is the response, then leave this field empty | "response.entities" |
idField | Yes | - | No | Child ID field. Field within each child holding its ID. For example, if each child has the following structure: {"entity":{"entityId":"abc-ef-1234"}, "att1":"val1"} then entity.entityId should be used | "entity.entityId" |
childType | Yes | entity | No | Type given to the items discovered by the scan query, this can be used to match a rule condition by type, if left empty 'entity' will be used | "user" |
setMetadataField | Yes | false | No | If you want to specify the metadata parent field, otherwise all the metadata will be at document root level | true |
metadataField | Yes | metadata | No | Specify the metadata parent field name (Required if setMetadataField is true) | "metadata" |
signatureFields | Yes | [] | Yes | Scan: Incremental configuration signature fields | [{"path": "$.attribute"},{"path": "$.attribute2"}] |
path | Yes | - | no | Signature Json Path (e.g. $.attribute). Json path to extract fields to use as signature. Check out https://github.com/json-path/JsonPath for JsonPath documentation | "$.attribute" |
Scan: Extended signatures | |||||
extendedSignature | Yes | false | No | Use this option if extra requests must be executed to obtain metadata needed to calculate modifications properly. Use this option carefully as this decreases the performance upon incremental crawls linearly. | true |
queries | No | - | Yes | Scan: Extended signature Queries | |
queryType | Yes | - | No | Query type - must be "metadataExtraction" | "metadataExtraction" |
urlTemplate | Yes | - | No | The query to execute | "/serviceEndpoint/${metadataParameter}" |
method | Yes | - | No | HTTP method. Options: GET, POST, PUT | "GET" |
body | Yes | - | No | The body of the POST or PUT body. Can include parameters to be replaced as: ${param1.paramA} (Required if method POST or PUT) | "{\"username\" : \"${username}\",\"password\" : \"${password}\"}" |
signatureFields | Yes | - | Yes | Signature fields | [{"path": "$.attribute"}] |
path | Yes | - | No | Signature Json Path (e.g. $.attribute). Json path to extract fields to use as signature. Check out https://github.com/json-path/JsonPath for JsonPath documentation | "$.attribute" |
resultField | Yes | - | No | Internal name of metadata where the the results will be extracted into | "field" |
Scan: Pagination | |||||
hasPagination | Yes | false | No | Enable pagination | true |
elasticLikeScrollPagination | Yes | false | No | If use Elasticsearch scroll id for pagination | true |
nextPageLink | Yes | false | No | If the next page link should be taken from a field in the response | true |
nextPageLinkField | Yes | - | No | Path to the next page link field in the response, use dots to use fields in hierarchical paths | "response.nextPageLink" |
isFromHeader | Yes | false | No | If the next page link is on the header, otherwise the response will be checked | true |
isRelative | Yes | false | No | If the next page link is relative, i.e: /api/data/9/children/?skip=1&limit=1 | true |
pageSize | No | 300 | No | The maximum number of entries the query retrieves per page | 100 |
containsTotal | Yes | false | No | If the response has as a response the total number of entities in all pages | true |
totalField | Yes | - | No | Path to the total field in the response. If the response has {"response":{"totalEntities":50000, "entities":[...]}} then response.totalEntities should be used (Required if containsTotal is true) | "response.totalEntities" |
requestParams | Yes | false | No | If pagination is controlled by request parameters | true |
queryParameters | Yes | - | No | Parameters template for pagination. Properties that you can use: ${pagination.offset} ${pagination.pageNumber} ${pagination.pageSize} (Required if requestParams is true) | start=${pagination.offset}&pageSize=${pagination.pageSize} |
startPage | Yes | 0 | No | The start index of the pagination (Required if requestParams is true) | 1 |
Metadata extraction | |||||
resultField | Yes | - | No | Internal name of metadata where the results will be extracted into | "someField" |
persistResponse | Yes | false | No | If selected the response of the current query will be stored in the state database, and can eventually be retrieved by a groovy query | true |
cache | Yes | false | No | if cache should be enabled | true |
cacheSize | Yes | 100 | No | Maximum cache size for request (Required if cache is true) | 200 |
cacheExpiration | Yes | 3600 | No | Cache expiration in seconds (Required if cache is true) | 60 |
{ "seed": "N/A", "description": "REST seed", "connector": "93c16011-562d-4aba-a57d-31a945b3f8e5", "connection": "0ed33b76-e0ea-4ff0-ba1e-dcd25a3024c6", "throttlePolicy": null, "deleteIncrementalPolicy": null, "routingPolicies": null, "tags": [], "type": "rest-api", "properties": { "trustAllCertificates": true, "crawlRules": [ { "conditionalScript": "item.getType().toString().equals('root')", "shouldStop": false, "shouldIndex": false, "queries": [ { "scriptedQuery": false, "urlTemplate": "/connectors", "method": "GET", "jsonRequest": true, "customHeaders": [], "queryType": "scan", "scan": { "childrenPath": "connector", "idField": "id", "childType": "user", "setMetadataField": true, "metadataField": "searchFields", "signatureFields": [], "extendedSignature": false, "hasPagination": true, "elasticLikeScrollPagination": false, "nextPageLink": false, "pageSize": 100, "containsTotal": false, "requestParams": true, "queryParameters": "page=${pagination.pageNumber}&per_page=${pagination.pageSize}", "startPage": 0 } } ] }, { "conditionalScript": "false", "entityType": "user", "shouldStop": "false", "shouldIndex": "true" } ] } }
Field | Required | Default | Multiple | Notes | Example |
---|---|---|---|---|---|
id | Yes | - | No | Id of the seed to update | "2f287669-d163-4e35-ad17-6bbfe9df3778" |
(see the "Create seed" for other fields) |
{ "id": "2f287669-d163-4e35-ad17-6bbfe9df3778", "seed": "N/A", "description": "REST seed", "connector": "93c16011-562d-4aba-a57d-31a945b3f8e5", "connection": "0ed33b76-e0ea-4ff0-ba1e-dcd25a3024c6", "throttlePolicy": null, "deleteIncrementalPolicy": null, "routingPolicies": null, "tags": [], "type": "rest-api", "properties": { "trustAllCertificates": true, "crawlRules": [ { "conditionalScript": "item.getType().toString().equals('root')", "shouldStop": false, "shouldIndex": false, "queries": [ { "scriptedQuery": false, "urlTemplate": "/connectors", "method": "GET", "jsonRequest": true, "customHeaders": [], "queryType": "scan", "scan": { "childrenPath": "connector", "idField": "id", "childType": "user", "setMetadataField": true, "metadataField": "searchFields", "signatureFields": [], "extendedSignature": false, "hasPagination": true, "elasticLikeScrollPagination": false, "nextPageLink": false, "pageSize": 100, "containsTotal": false, "requestParams": true, "queryParameters": "page=${pagination.pageNumber}&per_page=${pagination.pageSize}", "startPage": 0 } } ] }, { "conditionalScript": "false", "entityType": "user", "shouldStop": "false", "shouldIndex": "true" } ] } }