Page History
...
Code Block | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|
| ||||||||||
private static void downloadStream(SourceItem item) { System.out.println("Item: "+item.getName()); try { InputStream is = item.getContentStream(); if (is != null) { FileOutputStream fos = new FileOutputStream(new File("output/"+item.getName())); copyStream(is, fos); fos.close(); is.close(); } } catch (Exception e) { e.printStackTrace(); } } |
Step 4
...
The Scanner class:
Code Block | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|
| ||||||||||
static class Scanner implements ScanListener {
/**
* The queue that receives all the new items discovered
*/
ArrayList<SourceItem> queue;
/**
* Temporary queue used for iterate the original queue
*/
ArrayList<SourceItem> safeQueue;
private RepositoryAccessProvider rap;
private SourceInfo info;
private FetchURL fetcher;
public Scanner(RepositoryAccessProvider rap, SourceInfo info) throws AspireException {
this.rap = rap;
this.info = info;
queue = new ArrayList<SourceItem>();
safeQueue = new ArrayList<SourceItem>();
fetcher = new StandaloneFetchURL(rap);
fetcher.initialize(emptyDom);
}
public void close() {
fetcher.close();
}
@Override
public void addItem(SourceItem item) {
//This gets called when the RAP scanner adds an item to crawl
queue.add(item);
}
@Override
public void addItems(List<SourceItem> items) {
//This gets called when the RAP scanner adds items to crawl
queue.addAll(items);
}
public void processQueue(Consumer<SourceItem> processor) throws AspireException {
RepositoryConnection conn = rap.newConnection(info);
//Move the items from the original queue into the safeQueue
//And clear the orignal
safeQueue.clear();
safeQueue.addAll(queue);
queue.clear();
for (SourceItem item : safeQueue) {
boolean container = false;
if (rap.isContainer(item, conn)) {
container = true;
}
if (info.indexContainers() || !container) {
rap.populate(item, info, conn);
//Call the fetcher
Job job = JobFactory.newInstance(item.generateJobDocument());
job.put("sourceInfo", info);
job.put("crawlController", info.getCrawlController());
fetcher.process(job);
item.setContentStream((InputStream) job.get("contentStream"));
}
if (container) {
rap.scan(item, info, conn, this);
}
processor.accept(item);
}
safeQueue.clear();
}
public boolean isQueueEmpty() {
return queue.size()+safeQueue.size() == 0;
}
}
|
For Legacy connectors standalone crawls see:
...
Overview
Content Tools