We support these crawled repositories authentication types:
Field | Required | Default | Multiple | Notes | Example |
---|---|---|---|---|---|
type | yes | - | no | The value must be "rest-api". | "rest-api" |
description | yes | - | no | Name of the credential object. | "My REST Credential" |
properties | yes | - | no | Configuration object | |
type | yes | - | no | Authentication type: basic, apiToken, bearer, azure, none. | basic |
type: basic | |||||
loginAccount | yes | - | no | Username. | "admin" |
password | yes | - | no | Password (can be encrypted in Aspire fashion). | "adminPassword" |
type: apiToken | |||||
headerName | yes | - | no | The name of the HTTP header field to be sent with a request. | "tokenName1" |
headerValue | yes | - | no | The value of the "headerName" field. | "tokenValue1" |
type: bearer | |||||
query | yes | - | no | bearer query: JSON object representing the query to be sent for getting the accessToken | |
urlTemplate | yes | - | no | The context path of the URL. | "/login" |
loginAccount | yes | - | no | Username. Used as a value for ${loginAccount} query body field. | "admin" |
password | yes | - | no | Password. Used as a value for ${password} query body field. | "adminPassword" |
method | yes | - | no | HTTP method. Must be POST in this version. | "POST" |
body | yes | - | no | The query body. Fields: ${loginAccount}, ${password} are expected to be used as a part of the body. | "{\"username\" : \"${username}\",\"password\" : \"${password}\"}" |
resultField | yes | - | no | The field in the response with the access token. | "accessToken" |
isFormBody | yes | false | no | Select if the request uses a Form body instead of a JSON body. | false |
setExpiresField | yes | false | no | Select if you want to specify the expires field. | false |
expiresType | yes | at | no | Select the type of token expiration: "at", "in". | "at" |
expiresAtField | no | - | no | The name of the field of the generated token (Only if expiresType is "at"). | "token" |
expiresAtFormat | no | yyyy-MM-dd'T'HH:mm:ss.SSSXXX | no | The date format of the generated token (Only if expiresType is "at"). | "yyyy-MM-dd'T'HH:mm:ss.SSSXXX" |
expiresInTime | no | false | no | Select if you want to specify the expiration of the token in a specific time (Only if expiresType is "in"). | false |
expiresInField | no | - | no | The name of the field of the generated token (Only if expiresType is "in"). | "token" |
timeType | no | - | no | The unit of the timestamp: "millis", "seconds", "minutes", "hours" (Only if expiresType is "in" and expiresInTime is "false"). | "millis" |
expiresInTimeType | no | - | no | The unit of the timestamp: "millis", "seconds", "minutes", "hours" (Only if expiresType is "in" and expiresInTime is "true"). | "minutes" |
expiresInField | no | - | no | The amount of time you want to wait for the token expiration (Only if expiresType is "in" and expiresInTime is "true"). | "10" |
setHeader | yes | false | no | Select if you want intend to specify the Authorization header name | false |
headerName | yes | Authorization | no | Authorization header name | "Authorization" |
customHeaders | yes | empty | no | Additional Headers | [{"field": "Accept", "value": "application/json"}] |
type: azure | |||||
client_id | yes | - | no | The Application (client) ID that the Azure portal - App registrations page assigned to your app | "1234ab567-89cd0" |
client_secret | yes | - | no | The credentials that authenticate the application | "1234ab567-89cd0" |
scope | yes | - | no | A space-separated list of scopes, or permissions, that the app requires | "user.read%20openid%20profile%20offline_access" |
urlTemplate | yes | - | no | The directory tenant that you want to log the user into. This can be in GUID or friendly name format | "myTenant" |
Code Block | ||||
---|---|---|---|---|
| ||||
{ "type": "rest-api", "description": "My credential", "properties": { "type": "bearer", "query": { "urlTemplate": "/login", "username": "admin", "password": "encrypted:xxxxx", "method": "POST", "body": "{\"username\": \"${username}\", \"password\": \"${password}\"}", "resultField": "accessToken", "isFormBody": false, "setExpiresField": true, "expiresType": "in", "expiresInTime": true, "expiresInTimeType": "minutes", "expiresInField": "10", "setHeader": true, "headerName": "X-Tableau-Auth", "customHeaders": [ { "field": "Accept", "value": "application/json" } ] } } } |
Field | Required | Default | Multiple | Notes | Example |
---|---|---|---|---|---|
type | Yes | - | No | The value must be "rest-api". | "rest-api" |
description | Yes | - | No | Name of the connection object. | "My REST Connection" |
throttlePolicy | No | - | No | Id ID of the throttle policy that applies to this connection object. | "6b235b333a1b" |
routingPolicies | No | [ ] | Yes | The ids IDs of the routing policies that this connection will use. | ["17f75ce7d0c7", "d42780003b36"] |
deleteIncrementalPolicy | No | - | No | Id ID of the delete policy that applies to this connection object. | "6b235b333a1b" |
credential | Yes | - | No | Id ID of the credential | "6b235b333a1b" |
properties | Yes | - | No | Configuration object | |
baseUrl | Yes | - | No | Your rest service API urlURL | "https://your-service/api/v2/" |
connectionTimeout | Yes | 10000 | No | The period of time (ms) in which the connection will be closeclosed. | 10000 |
socketTimeout | Yes | 10000 | No | The period of time (ms) in which the socket will be closeclosed. | 10000 |
maxRetries | Yes | 3 | No | The number of connections retries | 3 |
useThrottling | Yes | false | No | If you want to use connection Throttling | false |
throttling | Yes | throttling_false | No | Constant required if useThrottling is false | "throttling_false" |
throttlingConnectionRate | No | 500 | No | The number of calls that your API can receive within a fixed period of time (Required if useThrottling is true) | 500 |
throttlingRate | No | 5000 | No | The period of fixed time in which the number of API calls can be received (in milliseconds) ( Required if useThrottling is true) | 5000 |
useProxy | Yes | false | No | If you want to configure a proxy | false |
proxy | Yes | proxy_false | No | Constant required if useProxy is false | "proxy_false" |
proxyProtocol | Yes | http | No | The protocol of the proxy connection | "http" |
proxyHost | Yes | - | No | Your proxy host | "myHostName" |
proxyPort | Yes | 3128 | No | Your proxy port | 3128 |
useProxyAuth | Yes | false | No | If you want to configure a proxy authentication | false |
proxyAuth | Yes | proxyAuth_false | No | Constant required if useProxyAuth is false | "proxyAuth_false" |
proxyUser | Yes | - | No | Proxy Username | "Administrator" |
proxyPass | Yes | - | No | Proxy Password | "adminPassword" |
trustAllCertificates | Yes | false | No | If selected, no HTTPS certificate validation will be done. | true |
Code Block | ||||
---|---|---|---|---|
| ||||
{ "type": "rest-api", "description": "Rest conn 3", "credential": "0b6fd9c8-d722-4874-aca1-e57c6eff2089", "properties": { "baseUrl": "http://aspire_manager:50443/aspire/_api", "connectionTimeout": 10000, "socketTimeout": 10000, "maxRetries": 3, "useThrottling": false, "throttling": "throttling_false", "useProxy": false, "proxy": "proxy_false", "trustAllCertificates": false } } |
Field | Required | Default | Multiple | Notes | Example |
---|---|---|---|---|---|
id | Yes | - | No | Id ID of the connection to update | "d442adcab4b0", |
description | No | - | No | Name of the connection object. | "My REST Connection" |
throttlePolicy | No | - | No | Id ID of the throttle policy that applies to this connection object. | "b3a9-6b235b333a1b" |
routingPolicies | No | [ ] | Yes | The ids IDs of the routing policies that this connection will use. | ["17f75ce7d0c7", "d42780003b36"] |
credential | No | - | No | Id ID of the credential | "6b235b333a1b" |
properties | No | - | No | Configuration object | |
(see create connection) |
Code Block | ||||
---|---|---|---|---|
| ||||
{ "id": "89d6632a-a296-426c-adb0-d442adcab4b0", "description": "REST connection", "properties": { "baseUrl": "http://aspire_manager:50443/aspire/_api" } } |
For the creation of the Connector object using the Rest API check , refer to this page
Field | Required | Default | Multiple | Notes | Example |
---|---|---|---|---|---|
seed | Yes | - | No | N/A | "N/A" |
type | Yes | - | No | The value must be "rest-api". | "rest-api" |
description | Yes | - | No | Name of the seed object. | "My REST Seed" |
connector | Yes | - | No | The id ID of the connector to be used with this seed. The connector type must match the seed type. | "e3ca414b0d31" |
connection | Yes | - | No | The id ID of the connection to be used with this seed. The connection type must match the seed type. | "e4a663fe9ee6" |
workflows | No | [ ] | Yes | The ids IDs of the workflows that will be executed for the documents crawled. | ["5696c3f0bda4"] |
throttlePolicy | No | - | No | Id ID of the throttle policy that applies to this seed object. | "6b235b333a1b" |
routingPolicies | No | [ ] | Yes | The ids IDs of the routing policies that this seed will use. | ["17f75ce7d0c7", "d42780003b36"] |
deleteIncrementalPolicy | No | - | No | Id ID of the delete policy that applies to this connection object. | "6b235sd23423b" |
tags | No | [ ] | Yes | The tags of the seed. These can be used to filter the seed | ["tag1", "tag2"] |
properties | Yes | - | No | Configuration object | |
seed | Yes | - | No | N/A | "N/A" |
crawlRules | Yes | - | Yes | Crawl rules | |
conditionalScript | Yes | true | No | Groovy condition to determine which items should execute this set of queries. Groovy script to determine if a given item should execute this set of queries. The following matches the root item: item.getType().toString().equals('root') The following matches any extracted entity from a scan: item.getType().toString().equals('entity') | "item.getType().toString().equals('root')" |
entityType | Yes | - | No | Entity type to match (required if conditionalScript is false) | "root" |
shouldStop | No | false | No | If selected, then no other queries will be executed for the given item. | true |
shouldIndex | No | false | No | If selected, the item matching this crawl rule will be indexed. | true |
queries | No | - | Yes | Crawl rules: Queries to execute inside the rule | |
scriptedQuery | Yes | false | No | If you use custom scripted query | false |
groovyScan | Yes | false | If the current script should be able to extract new items (Required if scriptedQuery is true) | false | |
queryType | Yes | metadataExtraction | No | Query type: metadataExtraction or scan (Required if scriptedQuery is true) | metadataExtraction |
groovyScript | Yes | - | No | Custom query executed with groovy (Required if scriptedQuery is true) | "rest.execute(\"/relative-to-base/action\")" |
urlTemplate | Yes | - | No | The query to execute. If ${metadataParameter} is found inside the field, it will be replaced with a specific value (for example from the scan result entity) | "/serviceEndpoint/${name}" |
method | Yes | GET | No | HTTP method. Options: GET, POST, PUT | "GET" |
body | Yes | - | No | The body of the POST or PUT body. Can include parameters to be replaced as: ${param1.paramA} (Required if method POST or PUT) | "{\"username\" : \"${username}\",\"password\" : \"${password}\"}" |
jsonRequest | Yes | true | No | If the request body is jsonJSON | true |
contentType | Yes | json | No | The body mime type: jsonJSON/xml/text (Required if jsonRequest is false) | "xml" |
customHeaders | Yes | [] | Yes | Custom headers' configuration for the request | [{"field": "Accept","value": "application/json"}] |
queryType | Yes | scan | No | The query type: scan/metadataExtraction/binaryFetch | "scan" |
Scan | |||||
childrenPath | No | - | No | Extraction path. The path to the response array that contains the children to extract. For example, if the response comes as {"response":{"entitities":[{1},{2},{..},{n}]}} response.entities should be used. If the array is the response, then leave this field empty | "response.entities" |
idField | Yes | - | No | Child ID field. Field within each child holding its ID. For example, if each child has the following structure: {"entity":{"entityId":"abc-ef-1234"}, "att1":"val1"} then entity.entityId should be used | "entity.entityId" |
childType | Yes | entity | No | Type given to the items discovered by the scan query, this can be used to match a rule condition by type, if left empty, 'entity' will be used | "user" |
setMetadataField | Yes | false | No | If you want to specify the metadata parent field, otherwise all the metadata will be at document root level | true |
metadataField | Yes | metadata | No | Specify the metadata parent field name (Required if setMetadataField is true) | "metadata" |
signatureFields | Yes | [] | Yes | Scan: Incremental configuration signature fields | [{"path": "$.attribute"},{"path": "$.attribute2"}] |
path | Yes | - | no | Signature Json JSON Path (e.g. $.attribute). Json JSON path to extract fields to use as signature. Check out https://github.com/json-path/JsonPath for JsonPath documentation | "$.attribute" |
Scan: Extended signatures | |||||
extendedSignature | Yes | false | No | Use this option if extra requests must be executed to obtain the metadata needed to calculate modifications properly. Use this option carefully, as this decreases the performance upon incremental crawls' performance linearly. | true |
queries | No | - | Yes | Scan: Extended signature Queries | |
queryType | Yes | - | No | Query type - must be "metadataExtraction" | "metadataExtraction" |
urlTemplate | Yes | - | No | The query to execute. | "/serviceEndpoint/${metadataParameter}" |
method | Yes | - | No | HTTP method. Options: GET, POST, PUT | "GET" |
body | Yes | - | No | The body of the POST or PUT body. Can include parameters to be replaced as: ${param1.paramA} (Required if method POST or PUT) | "{\"username\" : \"${username}\",\"password\" : \"${password}\"}" |
signatureFields | Yes | - | Yes | Signature fields. | [{"path": "$.attribute"}] |
path | Yes | - | No | Signature Json JSON Path (e.g. $.attribute). Json JSON path to extract fields to use as signature. Check out https://github.com/json-path/JsonPath for JsonPath documentation. | "$.attribute" |
resultField | Yes | - | No | Internal name of metadata where the the results will be extracted into. | "field" |
Scan: Pagination | |||||
hasPagination | Yes | false | No | Enable pagination | true |
elasticLikeScrollPagination | Yes | false | No | If you use Elasticsearch scroll id ID for pagination | true |
nextPageLink | Yes | false | No | If the next page link should be taken from a field in the response | true |
nextPageLinkField | Yes | - | No | Path to the next page link field in the response, use dots to use fields in hierarchical paths | "response.nextPageLink" |
isFromHeader | Yes | false | No | If the next page link is on the header, otherwise the response will be checked | true |
isRelative | Yes | false | No | If the next page link is relative, i.e: /api/data/9/children/?skip=1&limit=1 | true |
pageSize | No | 300 | No | The maximum number of entries the query retrieves per page | 100 |
containsTotal | Yes | false | No | If the response has as a response the total number of entities in all pages | true |
totalField | Yes | - | No | Path to the total field in the response. If the response has {"response":{"totalEntities":50000, "entities":[...]}} then response.totalEntities should be used (Required if containsTotal is true) | "response.totalEntities" |
requestParams | Yes | false | No | If pagination is controlled by request parameters | true |
queryParameters | Yes | - | No | Parameters template for pagination. Properties that you can use: ${pagination.offset} ${pagination.pageNumber} ${pagination.pageSize} (Required if requestParams is true) | start=${pagination.offset}&pageSize=${pagination.pageSize} |
startPage | Yes | 0 | No | The start index of the pagination (Required if requestParams is true) | 1 |
Metadata extraction | |||||
resultField | Yes | - | No | Internal name of metadata where the results will be extracted into | "someField" |
persistResponse | Yes | false | No | If selected, the response of the current query will be stored in the state database, and can eventually be retrieved by a groovy query | true |
cache | Yes | false | No | if cache should be enabled | true |
cacheSize | Yes | 100 | No | Maximum cache size for request (Required if cache is true) | 200 |
cacheExpiration | Yes | 3600 | No | Cache expiration in seconds (Required if cache is true) | 60 |
Code Block | ||||
---|---|---|---|---|
| ||||
{ "seed": "N/A", "description": "REST seed", "connector": "93c16011-562d-4aba-a57d-31a945b3f8e5", "connection": "0ed33b76-e0ea-4ff0-ba1e-dcd25a3024c6", "throttlePolicy": null, "deleteIncrementalPolicy": null, "routingPolicies": null, "tags": [], "type": "rest-api", "properties": { "trustAllCertificates": true, "crawlRules": [ { "conditionalScript": "item.getType().toString().equals('root')", "shouldStop": false, "shouldIndex": false, "queries": [ { "scriptedQuery": false, "urlTemplate": "/connectors", "method": "GET", "jsonRequest": true, "customHeaders": [], "queryType": "scan", "scan": { "childrenPath": "connector", "idField": "id", "childType": "user", "setMetadataField": true, "metadataField": "searchFields", "signatureFields": [], "extendedSignature": false, "hasPagination": true, "elasticLikeScrollPagination": false, "nextPageLink": false, "pageSize": 100, "containsTotal": false, "requestParams": true, "queryParameters": "page=${pagination.pageNumber}&per_page=${pagination.pageSize}", "startPage": 0 } } ] }, { "conditionalScript": "false", "entityType": "user", "shouldStop": "false", "shouldIndex": "true" } ] } } |
Field | Required | Default | Multiple | Notes | Example |
---|---|---|---|---|---|
id | Yes | - | No | Id ID of the seed to update | "2f287669-d163-4e35-ad17-6bbfe9df3778" |
(see the "Create seed" for other fields) |
Code Block | ||||
---|---|---|---|---|
| ||||
{ "id": "2f287669-d163-4e35-ad17-6bbfe9df3778", "seed": "N/A", "description": "REST seed", "connector": "93c16011-562d-4aba-a57d-31a945b3f8e5", "connection": "0ed33b76-e0ea-4ff0-ba1e-dcd25a3024c6", "throttlePolicy": null, "deleteIncrementalPolicy": null, "routingPolicies": null, "tags": [], "type": "rest-api", "properties": { "trustAllCertificates": true, "crawlRules": [ { "conditionalScript": "item.getType().toString().equals('root')", "shouldStop": false, "shouldIndex": false, "queries": [ { "scriptedQuery": false, "urlTemplate": "/connectors", "method": "GET", "jsonRequest": true, "customHeaders": [], "queryType": "scan", "scan": { "childrenPath": "connector", "idField": "id", "childType": "user", "setMetadataField": true, "metadataField": "searchFields", "signatureFields": [], "extendedSignature": false, "hasPagination": true, "elasticLikeScrollPagination": false, "nextPageLink": false, "pageSize": 100, "containsTotal": false, "requestParams": true, "queryParameters": "page=${pagination.pageNumber}&per_page=${pagination.pageSize}", "startPage": 0 } } ] }, { "conditionalScript": "false", "entityType": "user", "shouldStop": "false", "shouldIndex": "true" } ] } } |