View Javadoc

1   package net.sf.bacchus.spring;
2   
3   import java.io.BufferedReader;
4   import java.io.IOException;
5   import java.io.InputStreamReader;
6   import java.io.Reader;
7   import java.io.StringReader;
8   import java.nio.charset.Charset;
9   import java.nio.charset.CharsetDecoder;
10  import java.nio.charset.IllegalCharsetNameException;
11  import java.nio.charset.UnsupportedCharsetException;
12  
13  import net.sf.bacchus.Raw;
14  import net.sf.bacchus.Record;
15  import net.sf.bacchus.charset.X_US_ASCII_ACH;
16  
17  import org.springframework.batch.item.ReaderNotOpenException;
18  import org.springframework.batch.item.file.ResourceAwareItemReaderItemStream;
19  import org.springframework.batch.item.support.AbstractItemCountingItemStreamItemReader;
20  import org.springframework.core.io.Resource;
21  
22  /**
23   * A restartable {@link org.springframework.batch.item.ItemReader} for
24   * {@link Raw} ACH records.
25   * <p>
26   * This class is *not* thread safe.
27   * </p>
28   */
29  public class RecordReader extends AbstractItemCountingItemStreamItemReader<Raw> implements
30          ResourceAwareItemReaderItemStream<Raw> {
31  
32      /** input buffer. */
33      private final char[] buffer = new char[Record.ACH_RECORD_SIZE + 10];
34  
35      /** the name of the character set to decode from the resource. */
36      private String encoding;
37  
38      /** the encoding {@link java.nio.charset.CharsetDecoder#replacement()}. */
39      private String replacement;
40  
41      /** the input source. */
42      private Resource resource;
43  
44      /** the reader for the {@link #resource}. */
45      private Reader reader;
46  
47      /** whether to tolerate end of record separator characters. */
48      private boolean lenient;
49  
50      /** Creates an item reader for {@link Raw} ACH records. */
51      public RecordReader() {
52          this.reader = null;
53          this.replacement = null;
54      }
55  
56      /**
57       * {@inheritDoc}
58       * @param resource {@inheritDoc}
59       */
60      public void setResource(final Resource resource) {
61          this.resource = resource;
62          this.reader = null;
63          this.replacement = "";
64          setName(getClass().getName());
65      }
66  
67      /**
68       * Sets the character set to use for decoding the resource. If no character
69       * encoding is set explicitly, uses {@link X_US_ASCII_ACH} and falls back to
70       * {@code US-ASCII} if that is not available.
71       * @param encoding the name of the character set.
72       */
73      public void setEncoding(final String encoding) {
74          this.encoding = encoding;
75      }
76  
77      /**
78       * Sets whether to tolerate end of record separator characters including
79       * anything defined by {@link Character#isWhitespace(int)} or
80       * {@link java.nio.charset.CharsetDecoder#replacement()}. Raw ACH files
81       * should not have them, but test and sample files may have them added for
82       * readability.
83       * @param lenient whether to tolerate end of record separator characters.
84       */
85      public void setLenientSeparator(final boolean lenient) {
86          this.lenient = lenient;
87      }
88  
89      /**
90       * Opens a {@link BufferedReader} around the record resource.
91       * @see #setResource(Resource)
92       * @see #setEncoding(String)
93       * @throws IOException if the resource fails to get an input stream.
94       */
95      @Override
96      protected void doOpen() throws IOException {
97          if (this.resource != null && this.resource.exists()) {
98              final CharsetDecoder decoder = getCharset().newDecoder();
99              this.replacement = decoder.replacement();
100             this.reader = new InputStreamReader(this.resource.getInputStream(), decoder);
101         } else {
102             this.reader = new StringReader("");
103         }
104     }
105 
106     /**
107      * Closes the reader.
108      * @throws IOException if the reader cannot be closed.
109      */
110     @Override
111     protected void doClose() throws IOException {
112         if (this.reader != null) {
113             this.reader.close();
114         }
115     }
116 
117     /**
118      * Reads a raw record from the resource.
119      * @throws ReaderNotOpenException if the reader is not open.
120      * @throws IOException if there is an error with the underlying
121      *         {@link Reader}.
122      */
123     @Override
124     protected Raw doRead() throws ReaderNotOpenException, IOException {
125         if (this.reader == null) {
126             throw new ReaderNotOpenException("Reader must be open before it can be read.");
127         }
128 
129         final int start = this.lenient ? skipSeparators() : 0;
130         final int size = start + this.reader.read(this.buffer, start, Record.ACH_RECORD_SIZE - start);
131         if (size == -1) {
132             return null;
133         } else {
134             return new Raw(String.copyValueOf(this.buffer, 0, size));
135         }
136     }
137 
138     /**
139      * Reads through any record separators, leaving the first non-separator in
140      * {@link #buffer}. Record separators include any
141      * {@link Character#isWhitespace(int) whitespace} and the
142      * {@link CharsetDecoder#replacement()} used by the charset decoder. If the
143      * end of input is reached, returns {@code 0}.
144      * @return the number of characters that were read into {@link #buffer}.
145      */
146     private int skipSeparators() throws IOException {
147         do {
148             final int codePoint = this.reader.read();
149             if (codePoint == -1) {
150                 return 0;
151             } else if (Character.isWhitespace(codePoint)) {
152                 continue;
153             } else {
154                 final char[] first = Character.toChars(codePoint);
155                 if (this.replacement.equals(new String(first))) {
156                     // If the replacement were multi-character for a reason
157                     // other than being a supplementary character, more code
158                     // points would have to be read from the input to confirm a
159                     // match. Since ACH records are supposed to be in the 7-bit
160                     // range rather than > 16, this should never really happen.
161                     continue;
162                 } else {
163                     System.arraycopy(first, 0, this.buffer, 0, first.length);
164                     return first.length;
165                 }
166             }
167         } while (true);
168     }
169 
170     /**
171      * Selects a character set to decode the input resource. The search order
172      * is:
173      * <ol>
174      * <li>The encoding specified by {@link #setEncoding(String)}</li>
175      * <li>The specialized character set {@link X_US_ASCII_ACH}</li>
176      * <li>The built-in character set {@code US-ASCII}</li>
177      * <li>The default from {@link Charset#defaultCharset()}</li>
178      * </ol>
179      * @return
180      */
181     private Charset getCharset() {
182         for (final String charset : new String[] { this.encoding, X_US_ASCII_ACH.NAME, "US-ASCII" }) {
183             if (charset == null) {
184                 continue;
185             } else {
186                 try {
187                     return Charset.forName(charset);
188                 }
189                 catch (final IllegalCharsetNameException e) {
190                     continue;
191                 }
192                 catch (final UnsupportedCharsetException e) {
193                     continue;
194                 }
195             }
196 
197         }
198         // Should never get here because US-ASCII is always supported.
199         return Charset.defaultCharset();
200     }
201 
202 }