Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Detailed information on configuring the Publish to Avro File App Bundle.

Avro schema support

The Publish to Avro component supports Avro schema fields with primitive type only. It does not support complex data types. The typical example schema is like this:

Code Block
{
  "type": "record",
  "name": "TestAvroEvent",
  "namespace": "cz.incad",
  "fields": [
    {"name": "filepath", "type": ["null","string"]},
    {"name": "filemodificationtime", "type": ["null","long"]},
    {"name": "contenttext", "type": ["null","string"]}
 ]
}

When documents are published, all fields names in the schema are automatically populated if a top level aspire document field exists. To get the field /x/y/z, use a groovy script that looks like this  to copy the field to a top level field (as named in the schema) and it will then be published into the avro file:

Code Block
<script>
  <![CDATA[
    import com.searchtechnologies.aspire.services.AspireObject;
    import java.text.SimpleDateFormat;
    import java.text.ParseException;

    Map<String, String> avroFieldsMap = new HashMap<String, String>();
    avroFieldsMap.put("url", "filepath");
    avroFieldsMap.put("modificationDate", "filemodificationtime");
    avroFieldsMap.put("content", "contenttext");

	if (doc.url != null && doc.url.text() != null && !doc.url.text().trim().isEmpty()) {
      if (avroFieldsMap.get("url") != null) {
        doc.add(avroFieldsMap.get("url"), doc.url.text());
      }
    }

    if (doc.content != null && doc.content.text() != null && !doc.content.text().trim().isEmpty()) {
      if (avroFieldsMap.get("content") != null) {
        doc.add(avroFieldsMap.get("content"), doc.content.text());
        doc.removeChildren("content");
      }
    }      

    if (doc.extension != null) {
      List<AspireObject> extensions = doc.getAll("extension");
      for (AspireObject extension : extensions) {
        List<AspireObject> extensionFields = extension.getChildren();
        for (AspireObject aspireObj: extensionFields) {
          String fieldName = aspireObj.getAttribute("name");
          if (fieldName != null && !fieldName.trim().isEmpty() && avroFieldsMap.get(fieldName) != null) {
            String fieldValue = aspireObj.getText();
            if (fieldValue != null && !fieldValue.trim().isEmpty()) {
              if (fieldName.equals("modificationDate")) {
                try {
                  long dateMillis = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(fieldValue).getTime();
                  doc.add(avroFieldsMap.get(fieldName), dateMillis);
                } catch (ParseException pe) {
                  component.error(pe.toString());
                }
              } else {
                doc.add(avroFieldsMap.get(fieldName), fieldValue);
              }
            }
          }
        }
      }      
    }

  ]]>
</script>