Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.
Comment: Updated Seed section

The REST Connector can be configured using the Rest API. It requires the following entities to be created:

  • Credential
  • Connection
  • Connector
  • Seed

Below are the examples of how to create the Credential, Connection and the Seed. For the general connectors' configuration, please check this page.

Easy Heading Free
navigationTitleOn this Page
navigationExpandOptionexpand-all-by-default

Create Credential

We support these crawled repositories authentication types:

  • Basic - see here
  • Bearer - see here
  • API Token - can be used as an alternative for Bearer. It is about sending configured header field as a part of each request
  • Azure Credentials
  • None

FieldRequiredDefaultMultipleNotesExample
typeyes-noThe value must be "rest-api"."rest-api"
descriptionyes-noName of the credential object."My REST Credential"
propertiesyes-noConfiguration object
typeyes-noAuthentication type: basic, apiToken, bearer, azure, none.basic




type: basic
loginAccountyes-noUsername."admin"
passwordyes-noPassword (can be encrypted in Aspire fashion)."adminPassword"




type: apiToken
headerNameyes-noThe name of HTTP header field to be sent with a request."tokenName1"
headerValueyes-noThe value of the "headerName" field."tokenValue1"




type: bearer
queryyes-nobearer query: JSON object representing the query to be sent for getting the accessToken
urlTemplateyes-noThe context path of the URL."/login"
loginAccountyes-noUsername. Used as a value for ${loginAccount} query body field."admin"
passwordyes-noPassword. Used as a value for ${password} query body field."adminPassword"
methodyes-noHTTP method. Must be POST in this version."POST"
bodyyes-noThe query body. Fields: ${loginAccount}, ${password} are expected to be used as a part of the body."{\"username\" : \"${username}\",\"password\" : \"${password}\"}"
resultFieldyes-noThe field in the response with the access token."accessToken"
isFormBodyyesfalsenoSelect if the request uses a Form body instead of a JSON body.false
setExpiresFieldyesfalsenoSelect if you want to specify the expires field.false
expiresTypeyesatnoSelect the type of token expiration: "at", "in"."at"
expiresAtFieldno-noThe name of the field of the generated token (Only if expiresType is "at")."token"
expiresAtFormatnoyyyy-MM-dd'T'HH:mm:ss.SSSXXXnoThe date format of the generated token (Only if expiresType is "at")."yyyy-MM-dd'T'HH:mm:ss.SSSXXX"
expiresInTimenofalsenoSelect if you want to specify the expiration of the token in a specific time (Only if expiresType is "in").false
expiresInFieldno-noThe name of the field of the generated token (Only if expiresType is "in")."token"
timeTypeno-noThe unit of the timestamp: "millis", "seconds", "minutes", "hours" (Only if expiresType is "in" and expiresInTime is "false")."millis"
expiresInTimeTypeno-noThe unit of the timestamp: "millis", "seconds", "minutes", "hours" (Only if expiresType is "in" and expiresInTime is "true")."minutes"
expiresInFieldno-noThe amount of time you want to wait for the token expiration (Only if expiresType is "in" and expiresInTime is "true")."10"
setHeaderyesfalsenoSelect if you want to specify the Authorization header namefalse
headerNameyesAuthorizationno Authorization header name"Authorization"
customHeadersyesemptynoAdditional Headers [{"field": "Accept", "value": "application/json"}]




type: azure
client_idyes-noThe Application (client) ID that the Azure portal - App registrations page assigned to your app
"1234ab567-89cd0"
client_secretyes-noThe credentials that authenticate the application
"1234ab567-89cd0"
scopeyes-noA space-separated list of scopes, or permissions, that the app requires"user.read%20openid%20profile%20offline_access"
urlTemplateyes-noThe directory tenant that you want to log the user into. This can be in GUID or friendly name format"myTenant"

Example

Code Block
themeRDark
titlePOST aspire/_api/credentials
{
    "type": "rest-api",
    "description": "My credential",
    "properties": {
        "type": "bearer",
        "query": {
            "urlTemplate": "/login",
            "username": "admin",
            "password": "encrypted:xxxxx",
            "method": "POST",
            "body": "{\"username\": \"${username}\", \"password\": \"${password}\"}",
            "resultField": "accessToken",
			"isFormBody": false,
      		"setExpiresField": true,
		    "expiresType": "in",
		    "expiresInTime": true,
      		"expiresInTimeType": "minutes",
      		"expiresInField": "10",
      		"setHeader": true,
      		"headerName": "X-Tableau-Auth",
      		"customHeaders": [
        		{
          			"field": "Accept",
          			"value": "application/json"
        		}
      		]          
		}
    }
}

Create Connection


FieldRequiredDefaultMultipleNotesExample
typeYes-NoThe value must be "rest-api"."rest-api"
descriptionYes-NoName of the connection object."My REST Connection"
throttlePolicyNo-NoId of the throttle policy that applies to this connection object."6b235b333a1b"
routingPoliciesNo[ ]YesThe ids of the routing policies that this connection will use.["17f75ce7d0c7", "d42780003b36"]
deleteIncrementalPolicyNo-NoId of the delete policy that applies to this connection object."6b235b333a1b"
credentialYes-NoId of the credential"6b235b333a1b"
propertiesYes-NoConfiguration object
baseUrlYes-NoYour rest service API url"https://your-service/api/v2/"
connectionTimeoutYes10000NoThe period of time (ms) in which the connection will be close10000
socketTimeoutYes10000NoThe period of time (ms) in which the socket will be close10000
maxRetriesYes3NoThe number of connections retries3
useThrottlingYesfalseNoIf you want to use connection Throttlingfalse
throttlingYesthrottling_falseNoConstant required if useThrottling is false"throttling_false"
throttlingConnectionRateNo500NoThe number of calls that your API can receive within a fixed period of time (Required if useThrottling is true)500
throttlingRateNo5000NoThe period of fixed time in which the number of API calls can be received (in milliseconds) ( Required if useThrottling is true)5000
useProxyYesfalseNoIf you want to configure a proxyfalse
proxyYesproxy_falseNoConstant required if useProxy is false"proxy_false"
proxyProtocolYeshttpNoThe protocol of the proxy connection"http"
proxyHostYes-NoYour proxy host"myHostName"
proxyPortYes3128NoYour proxy port3128
useProxyAuthYesfalseNoIf you want to configure a proxy authenticationfalse
proxyAuthYesproxyAuth_falseNoConstant required if useProxyAuth is false"proxyAuth_false"
proxyUserYes-NoProxy Username"Administrator"
proxyPassYes-NoProxy Password"adminPassword"
trustAllCertificatesYesfalseNoIf selected, no HTTPS certificate validation will be done.true

Example

Code Block
themeRDark
titlePOST aspire/_api/connections
{
    "type": "rest-api",
    "description": "Rest conn 3",
    "credential": "0b6fd9c8-d722-4874-aca1-e57c6eff2089",
    "properties": {
        "baseUrl": "http://aspire_manager:50443/aspire/_api",
		"connectionTimeout": 10000,
	    "socketTimeout": 10000,
    	"maxRetries": 3,
	    "useThrottling": false,
    	"throttling": "throttling_false",
	    "useProxy": false,
	    "proxy": "proxy_false",
    	"trustAllCertificates": false
     }
}

Update Connection


FieldRequiredDefaultMultipleNotesExample
idYes-NoId of the connection to update"d442adcab4b0",
descriptionNo-NoName of the connection object."My REST Connection"
throttlePolicyNo-NoId of the throttle policy that applies to this connection object."b3a9-6b235b333a1b"
routingPoliciesNo[ ]YesThe ids of the routing policies that this connection will use.["17f75ce7d0c7", "d42780003b36"]
credentialNo-NoId of the credential"6b235b333a1b"
propertiesNo-NoConfiguration object
(see create connection)




Example

Code Block
themeRDark
titlePUT aspire/_api/connections/89d6632a-a296-426c-adb0-d442adcab4b0
{
   "id": "89d6632a-a296-426c-adb0-d442adcab4b0",
   "description": "REST connection",
     "properties": {
        "baseUrl": "http://aspire_manager:50443/aspire/_api"
    } 
}

Create Connector


For the creation of the Connector object using the Rest API check this page

Update Connector


For the update of the Connector object using the Rest API check this page

Create Seed


FieldRequiredDefaultMultipleNotesExample
seedYes-NoN/A"N/A"
typeYes-NoThe value must be "rest-api"."rest-api"
descriptionYes-NoName of the seed object."My REST Seed"
connectorYes-NoThe id of the connector to be used with this seed. The connector type must match the seed type."e3ca414b0d31"
connectionYes-NoThe id of the connection to be used with this seed. The connection type must match the seed type."e4a663fe9ee6"
workflowsNo[ ]YesThe ids of the workflows that will be executed for the documents crawled.["5696c3f0bda4"]
throttlePolicyNo-NoId of the throttle policy that applies to this seed object."6b235b333a1b"
routingPoliciesNo[ ]YesThe ids of the routing policies that this seed will use.["17f75ce7d0c7", "d42780003b36"]
deleteIncrementalPolicyNo-NoId of the delete policy that applies to this connection object."6b235sd23423b"
tagsNo[ ]YesThe tags of the seed. These can be used to filter the seed["tag1", "tag2"]
propertiesYes-NoConfiguration object
seedYes-NoN/A"N/A"
crawlRules
yes
Yes-
yes
YesCrawl rules
condition

conditionalScript
No
Yes
-
trueNo

Groovy condition to determine which items should execute this set of queries. Groovy script to determine if a given item should execute this set of queries.

The following matches the root item: item.getType().toString().equals('root')

The following matches any extracted entity from a scan: item.getType().toString().equals('entity')

"item.getType().toString().equals('root')"
entityTypeYes-NoEntity type to match (required if conditionalScript is false)"root"
shouldStopNofalseNoIf selected, then no other queries will be executed for the given item.true
shouldIndexNofalseNo

If selected, the item matching this crawl rule will be indexed.

true
queriesNo-
yes
YesCrawl rules: Queries to execute inside the rule
urlTemplate

scriptedQueryYes
-
falseNo
The query to execute. If ${metadataParameter} is found inside the field it will be replaced with a specific value (for example from the scan result entity)"/serviceEndpoint/${name}"methodYes-NoHTTP method. Options: GET, POST, PUT"GET"body (if method POST or PUT)Yes (if method is POST or PUT)-NoThe body of the POST or PUT body. Can include parameters to be replaced as: ${param1.paramA}"{\"username\" : \"${username}\",\"password\" : \"${password}\"}"contentType (if method POST or PUT)NojsonnoThe body mime type: json/xml/text"xml"queryTypeyes-noThe query type: scan/metadataExtraction/binaryFetch"scan"ScanchildrenPathNoresponseNoExtraction path. The path to the response array that contains the children to extract. For example if the response comes as {"response":{"entitities":[{1},{2},{..},{n}]}} response.entities should be used. If the array is the response, then leave this field empty"response.entities"idFieldYes-NoChild ID field. Field within each child holding its ID. For example if each child has the following structure: {"entity":{"entityId":"abc-ef-1234"}, "att1":"val1"} then entity.entityId should be used"entity.entityId"signatureFieldsNo-YesScan: Incremental configuration signature fieldspath
If use custom scripted queryfalse
groovyScanYesfalse
If the current script should be able to extract new items (Required if scriptedQuery is true)false
queryTypeYesmetadataExtractionNoQuery type: metadataExtraction or scan (Required if scriptedQuery is true)metadataExtraction
groovyScriptYes-NoCustom query executed with groovy (Required if scriptedQuery is true)"rest.execute(\"/relative-to-base/action\")"
urlTemplateYes-NoThe query to execute. If ${metadataParameter} is found inside the field, it will be replaced with a specific value (for example from the scan result entity)"/serviceEndpoint/${name}"
methodYesGETNoHTTP method. Options: GET, POST, PUT"GET"
body Yes -NoThe body of the POST or PUT body. Can include parameters to be replaced as: ${param1.paramA} (Required if method POST or PUT)"{\"username\" : \"${username}\",\"password\" : \"${password}\"}"
jsonRequestYestrueNoIf the request body is jsontrue
contentTypeYesjsonNoThe body mime type: json/xml/text (Required if jsonRequest is false)"xml"
customHeadersYes[]YesCustom headers configuration for the request[{"field": "Accept","value": "application/json"}]
queryTypeYesscanNoThe query type: scan/metadataExtraction/binaryFetch"scan"




Scan
childrenPathNo-NoExtraction path. The path to the response array that contains the children to extract. For example, if the response comes as {"response":{"entitities":[{1},{2},{..},{n}]}} response.entities should be used. If the array is the response, then leave this field empty"response.entities"
idFieldYes-NoChild ID field. Field within each child holding its ID. For example, if each child has the following structure: {"entity":{"entityId":"abc-ef-1234"}, "att1":"val1"} then entity.entityId should be used"entity.entityId"
childTypeYesentityNoType given to the items discovered by the scan query, this can be used to match a rule condition by type, if left empty 'entity' will be used"user"
setMetadataFieldYesfalseNoIf you want to specify the metadata parent field, otherwise all the metadata will be at document root leveltrue
metadataFieldYesmetadataNoSpecify the metadata parent field name (Required if setMetadataField is true)"metadata"
signatureFieldsYes[]YesScan: Incremental configuration signature fields[{"path": "$.attribute"},{"path": "$.attribute2"}]
pathYes
yes
-noSignature Json Path (e.g. $.attribute).  Json path to extract fields to use as signature. Check out https://github.com/json-path/JsonPath for JsonPath documentation"$.attribute"




Scan: Extended signatures
extendedSignature
no
Yesfalse
no
NoUse this option if extra requests must be executed to obtain metadata needed to calculate modifications properly. Use this option carefully as this decreases the performance upon incremental crawls linearly.true
queries
no
No-
yesScan: Extended signature Queries
YesScan: Extended signature Queries
queryTypeYes-NoQuery type - must be "metadataExtraction""metadataExtraction"
urlTemplateYes-NoThe query to execute"/serviceEndpoint/${metadataParameter}"
methodYes-NoHTTP method. Options: GET, POST, PUT"GET"
body Yes -NoThe body of the POST or PUT body. Can include parameters to be replaced as: ${param1.paramA} (Required if method POST or PUT)"{\"username\" : \"${username}\",\"password\" : \"${password}\"}"
signatureFieldsYes-YesSignature fields[{"path": "$.attribute"}]
path
queryTypeyes-noQuery type - must be "metadataExtraction""metadataExtraction"urlTemplateyes-noThe query to execute"/serviceEndpoint/${metadataParameter}"method
Yes-No
HTTP method. Options: GET, POST, PUT"GET"body (if method POST or PUT)
Signature Json Path (e.g. $.attribute).  Json path to extract fields to use as signature. Check out https://github.com/json-path/JsonPath for JsonPath documentation"$.attribute"
resultFieldYes
Yes (if method is POST or PUT)
-No
The body of the POST or PUT body. Can include parameters to be replaced as: ${param1.paramA}"{\"username\" : \"${username}\",\"password\" : \"${password}\"}"
Internal name of metadata where the the results will be extracted into"field"




Scan: Pagination
hasPaginationYesfalseNoEnable paginationtrue
elasticLikeScrollPaginationYesfalseNoIf use Elasticsearch scroll id for paginationtrue
nextPageLinkYesfalseNoIf the next page link should be taken from a field in the responsetrue
nextPageLinkFieldYes-NoPath to the next page link field in the response, use dots to use fields in hierarchical paths"response.nextPageLink"
isFromHeaderYesfalseNoIf the next page link is on the header, otherwise the response will be checkedtrue
isRelativeYesfalseNoIf the next page link is relative, i.e: /api/data/9/children/?skip=1&limit=1true
pageSizeNo300NoThe maximum number of entries the query retrieves per page100
containsTotalYesfalseNoIf the response has as a response the total number of entities in all pagestrue
totalFieldYes-No
contentType (if method POST or PUT)NojsonnoThe body mime type: json/xml/text"xml"signatureFieldsNo-YesSignature fieldspathyes-noSignature Json Path (e.g. $.attribute).  Json path to extract fields to use as signature. Check out https://github.com/json-path/JsonPath for JsonPath documentation"$.attribute"resultFieldyes-noInternal name of metadata where the the results will be extracted intoScan: PaginationhasPaginationnofalsenoEnable paginationtruepageSizeno300noThe maximum number of entries the query retrieve per page100totalFieldno-no
Path to the total field in the response. If the response has {"response":{"totalEntities":50000, "entities
":[...]}} then response.totalEntities should be used"response.totalEntities"queryParametersyes (if pagination controlled by query params)-noParameters template for paginationstart=${pagination.offset}&pageSize=300
":[...]}} then response.totalEntities should be used (Required if containsTotal is true)"response.totalEntities"
requestParamsYesfalseNoIf pagination is controlled by request parameterstrue
queryParametersYes-NoParameters template for pagination. Properties that you can use: ${pagination.offset} ${pagination.pageNumber} ${pagination.pageSize} (Required if requestParams is true)start=${pagination.offset}&pageSize=${pagination.pageSize}
startPageYes0NoThe start index of the pagination (Required if requestParams is true)1




Metadata extraction
resultField
yes
Yes-
no
NoInternal name of metadata where
the
the results will be extracted into"someField
"
"
persistResponseYesfalseNoIf selected the response of the current query will be stored in the state database, and can eventually be retrieved by a groovy querytrue
cacheYesfalseNoif cache should be enabledtrue
cacheSize
no
Yes100
no
NoMaximum cache size for request (Required if cache is true)200
cacheExpiration
no
Yes3600
no
NoCache expiration in seconds (Required if cache is true)60

Example

Code Block
themeRDark
titlePOST aspire/_api/seeds
{
    "seed": "N/A",
    "description": "REST seed",
    "connector": "93c16011-562d-4aba-a57d-31a945b3f8e5"",
  "connection": "0ed33b76-e0ea-4ff0-ba1e-dcd25a3024c6",
  "throttlePolicy": null,
  "deleteIncrementalPolicy": null,
  "routingPolicies": null,
    "connectiontags": "0ed33b76-e0ea-4ff0-ba1e-dcd25a3024c6"[],
    "type": "rest-api",
    "properties": {
        "trustAllCertificates": true,
        "crawlRules": [
            {
                "condition"conditionalScript": "item.getType().toString().equals('root')",
                "shouldStop": false,
        "shouldIndex": false,
        "shouldIndexqueries": false,[
          {
      "queries": [
     "scriptedQuery": false,
            "urlTemplate":  {
"/connectors",
            "method": "GET",
            "urlTemplatejsonRequest": "/connectors"true,
            "customHeaders": [],
            "methodqueryType": "GETscan",
            "scan": {
              "queryTypechildrenPath": "scanconnector",
              "idField": "id",
              "scanchildType": {
 "user",
              "setMetadataField": true,
              "childrenPathmetadataField": "connectorsearchFields",
              "signatureFields": [],
              "idFieldextendedSignature": "id"false,
              "hasPagination": true,
         }
     "elasticLikeScrollPagination": false,
               }"nextPageLink": false,
                ]"pageSize": 100,
            },
            {"containsTotal": false,
                "conditionrequestParams": "true",
                "shouldStopqueryParameters": "falsepage=${pagination.pageNumber}&per_page=${pagination.pageSize}",
                "shouldIndexstartPage": "true"0
            },
          }
      {
  ]
      },
      {
        "conditionconditionalScript": "false",
        "entityType": "user",
        "shouldStop": "false",
      
          "shouldIndex": "falsetrue"
            }
        ]
    }
}

Update Seed


FieldRequiredDefaultMultipleNotesExample
idYes-NoId of the seed to update"2f287669-d163-4e35-ad17-6bbfe9df3778"
(see the "Create seed" for other fields)




Example

Code Block
themeRDark
titlePUT aspire/_api/seeds/2f287669-d163-4e35-ad17-6bbfe9df3778
{
    "id": "2f287669-d163-4e35-ad17-6bbfe9df3778",
    "seed": "N/A",
    "description": "REST seed",
    "connector": "93c16011-562d-4aba-a57d-31a945b3f8e5",
    "connection": "0ed33b76-e0ea-4ff0-ba1e-dcd25a3024c6",
  "throttlePolicy": null,
  "deleteIncrementalPolicy": null,
  "routingPolicies": null,
  "tags": [],
  "type": "rest-api",
  "properties": {
        "trustAllCertificates": true,
        "crawlRules": [
            {
                "conditionconditionalScript": "item.getType().toString().equals('root')",
        "shouldStop": false,
        "shouldStopshouldIndex": false,
        "queries": [
       "shouldIndex": false,
    {
            "queriesscriptedQuery": [false,
            "urlTemplate": "/connectors",
       {
          "method": "GET",
              "urlTemplatejsonRequest": "/connectors"true,
            "customHeaders": [],
            "methodqueryType": "GETscan",
            "scan": {
              "queryTypechildrenPath": "scanconnector",
              "idField": "id",
              "scanchildType": {"user",
              "setMetadataField": true,
              "childrenPathmetadataField": "connectorsearchFields",
              "signatureFields": [],
              "idFieldextendedSignature": "id"false,
              "hasPagination": true,
          }
    "elasticLikeScrollPagination": false,
               }"nextPageLink": false,
              "pageSize": 100,
   ]
           "containsTotal": }false,
            {
    "requestParams": true,
              "condition": "truequeryParameters": "page=${pagination.pageNumber}&per_page=${pagination.pageSize}",
                "shouldStopstartPage": "false",0
                "shouldIndex": "true"
  }
          },
        ]
    {
  },
      {
        "conditionconditionalScript": "false",
        "entityType": "user",
        "shouldStop": "false",
                "shouldIndex": "falsetrue"
            }
        ]
    }
}