Page History
...
Code Block | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|
| ||||||||||
public static void main(String[] args) { //Get the scan properties from the arguments String scanPropertiesFile = args[0]; //Instantiate a new RepositoryAccessProvider from the connector implementation JobFactory.initializeForStandAloneUsage(); ComponentImpl component = new SP2013RAP(); component.initialize(emptyDom); RepositoryAccessProvider rap = (RepositoryAccessProvider) component; //Create a CrawlControllerImpl, as some connectors depends on it StandaloneCrawlController crawlCtrlImpl = new StandaloneCrawlController(rap); //Load the content-source.xml configuration into an AspireObject AspireObject scanProps = new AspireObject("doc"); AspireObject scanPropsFile = AspireObject.createFromXML(new File(scanPropertiesFile)); scanProps.add(scanPropsFile); //Create and initialize a new SourceInfo from the RAP SourceInfo info = rap.newSourceInfo(scanPropsFile); info.initialize(scanProps); info.setCrawlController(crawlCtrlImpl); crawlCtrlImpl.setSourceInfo(info); |
Step 3
...
Start the crawl:
Code Block | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|
| ||||||||||
//execute the crawl
crawl(rap, info, Main::downloadStream); |
the crawl() method:
Code Block | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|
| ||||||||||
private static void crawl(RepositoryAccessProvider rap, SourceInfo info, Consumer<SourceItem> processor) throws AspireException {
//The ScanListener which maintain the local processQueue and listen for new items to crawl
Scanner scanner = new Scanner(rap, info);
//Create the crawlRoot item to initialize the crawl from the RAP
SourceItem crawlRoot = new SourceItem("crawlRoot");
rap.processCrawlRoot(crawlRoot, info, scanner);
//Crawls the local processQueue while it is not empty
// when empty, it means the crawl finished
do {
scanner.processQueue(processor);
} while (!scanner.isQueueEmpty());
}
|
downloadStream() method:
Code Block | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|
| ||||||||||
private static void downloadStream(SourceItem item) {
System.out.println("Item: "+item.getName());
try {
InputStream is = item.getContentStream();
if (is != null) {
FileOutputStream fos = new FileOutputStream(new File("output/"+item.getName()));
copyStream(is, fos);
fos.close();
is.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
|
For Legacy connectors standalone crawls see:
...
Overview
Content Tools