IC211 Spring AY 2020

The following code is a simple program for recognizing file types. What's interesting about it is that it is extensible. You can basically make "plug-ins" to add more and more file types that it recognizes. The basic program is:

HW10.java
public class HW10 {
  public static void main(String[] args) {
    Categorizer C = new Categorizer();

    C.add(new RecogASCII());
    C.add(new RecogJPG());
    C.printCategories(System.in);
  }
}



RecogASCII.java
public class RecogASCII {
  /**
   * Returns the name of the category this tests for.
   */
  public String getName() {
    return "ASCII";
  }

  /**
   * Processes the next bye of the file.
   */
  public void feed(int nextByte) {
    if ((nextByte < 0) || (127 < nextByte)) {
      setState(0);
    }
  }

  /**
   * Returns true if the file (as processed so far) is an
   * ASCII file.
   */
  boolean decision() {
    return state != 0;
  }

  /**
   * The recognizer's states are 0, 1 and 2.
   * 2 means status of file unknown, 0 means definitely not a
   * a match (e.g. not an ASCII file), and 1 means definitely
   * a match.  This method returs the current state of the
   * recognizer.
   */
  public int getState() {
    return state;
  }

  /**
   * Sets the recognizer's state (see above).
   */
  public void setState(int v) {
    state = v;
  }

  private int state = 2; // 2 : unknown, 1 : match, 0 : not match
}
Categorizer.java
import java.io.*;
public class Categorizer {
  /**
   * Adds a RecogASCII object to the Categorizer, so that when
   * an input file is processed, it is one of the categories
   * that gets tested for.
   */
  public void add(RecogASCII r) {
    Q.enqueue(r);
  }

  /**
   * Takes an input stream (assumed to be a file) and
   * runs it through all the RecogASCII's that have been
   * added, and prints any matches discovered.
   */
  public void printCategories(InputStream is) {
    try {
      int n;

      while ((n = is.read()) != -1) {
        for (Queue.Iter i = Q.iterator(); i.hasNext(); i.next().feed(n)) {}
      }

      for (Queue.Iter i = Q.iterator(); i.hasNext();) {
        RecogASCII r = i.next();

        if (r.decision()) {
          System.out.println(r.getName());
        }
      }
    } catch (Exception e) {
      System.exit(1);
    }
  }

  private Queue Q = new Queue();

  private class Queue {
    public void enqueue(RecogASCII s) {
      if (head == null) {
        head = tail = new Node(s, null);
      } else {
        tail.next = new Node(s, null);
        tail      = tail.next;
      }
    }

    public RecogASCII dequeue() {
      Node t = head;

      head = head.next;

      if (head == null) {
        tail = null;
      }
      return t.data;
    }

    public boolean empty() {
      return head == null;
    }

    public Iter iterator() {
      return new Iter(head);
    }

    protected class Iter {
      private Node curr;
      public Iter(Node start) {
        curr = start;
      }

      public boolean hasNext() {
        return curr != null;
      }

      public RecogASCII next() {
        RecogASCII s = curr.data;

        curr = curr.next;
        return s;
      }
    }

    private class Node {
      public RecogASCII data;
      public Node next;
      public Node(RecogASCII d,
                  Node       n) {
        data = d;
        next = n;
      }
    }

    private Node head = null, tail = null;
  }
}

However, HW10 makes use of a second "recognizer" that recognizers JPG files. It just extends the basic RecogASCII and modifies its functionality a bit:


RecogJPG.java

/**
 * JPG file header is FF D8 FF E0 xx xx 4A 46 49 46 00
 * This recognizer checks whether the first 11 bytes
 * match this header.
 */
public class RecogJPG extends RecogASCII {
  // NOTE: 0x starts of hex integer literals, so 0xFF is the int 255.
  private static int[] header =
  {
    0xFF, 0xD8, 0xFF, 0xE0, -1, -1, 0x4A, 0x46, 0x49, 0x46, 0x00
  };

  // 0     1     2     3     4   5   6     7     8     9     10
  private int i = 0;

  public String getName() {
    return "JPG";
  }

  public void feed(int nextByte) {
    if (getState() == 2) { // 2 : unknown
      if ((header[i] >= 0) && (nextByte != header[i])) {
        setState(0);       // 0 : not match
      }
    }
    i++;

    if ((i >= header.length) && (getState() != 0)) {
        setState(1); // 1 : match
    }
  }

  boolean decision() {
    return getState() == 1;
  }
}

Take your favorite jpg file or ascii file (suppose it is called "foo") and run the analyzer like this:
cat foo | java HW10
... and if it's an ASCII file or JPG, the program will identify that fact.