Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
languagejava
firstline93
titledownloadStream method
linenumberstrue
collapsetrue
  private static void downloadStream(SourceItem item) {
    System.out.println("Item: "+item.getName());
    try {
      InputStream is = item.getContentStream();
      if (is != null) {
        FileOutputStream fos = new FileOutputStream(new File("output/"+item.getName()));
        copyStream(is, fos);
        fos.close();
        is.close();
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
  }


Step 4

...

The Scanner class:

Code Block
languagejava
firstline179
titleScanner class
linenumberstrue
collapsetrue
  static class Scanner implements ScanListener {

    /**
     * The queue that receives all the new items discovered
     */
    ArrayList<SourceItem> queue;
    
    /**
     * Temporary queue used for iterate the original queue
     */
    ArrayList<SourceItem> safeQueue;
    private RepositoryAccessProvider rap;
    private SourceInfo info;
    private FetchURL fetcher;
    
    public Scanner(RepositoryAccessProvider rap, SourceInfo info) throws AspireException {
      this.rap = rap;
      this.info = info;
      queue = new ArrayList<SourceItem>();
      safeQueue = new ArrayList<SourceItem>();
      fetcher = new StandaloneFetchURL(rap);
      fetcher.initialize(emptyDom);
    }
    
    public void close() {
      fetcher.close();
    }

    @Override
    public void addItem(SourceItem item) {
      //This gets called when the RAP scanner adds an item to crawl
      queue.add(item);
    }

    @Override
    public void addItems(List<SourceItem> items) {
      //This gets called when the RAP scanner adds items to crawl
      queue.addAll(items);
    }
    
    public void processQueue(Consumer<SourceItem> processor) throws AspireException {
      
      RepositoryConnection conn = rap.newConnection(info);
      
      //Move the items from the original queue into the safeQueue
      //And clear the orignal
      safeQueue.clear();
      safeQueue.addAll(queue);
      queue.clear();
      
      for (SourceItem item : safeQueue) {
        boolean container = false;
        
        if (rap.isContainer(item, conn)) {
          container = true;
        }
        
        if (info.indexContainers() || !container) {
          rap.populate(item, info, conn);
          //Call the fetcher
          Job job = JobFactory.newInstance(item.generateJobDocument());
          job.put("sourceInfo", info);
          job.put("crawlController", info.getCrawlController());
          fetcher.process(job);
          item.setContentStream((InputStream) job.get("contentStream"));
        }
        
        if (container) {
          rap.scan(item, info, conn, this);
        }
        processor.accept(item);
      }
      safeQueue.clear();
    }
    
    public boolean isQueueEmpty() {
      return queue.size()+safeQueue.size() == 0;
    }
  }



For Legacy connectors standalone crawls see:

...