1 package net.sf.bacchus.spring;
2
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.io.InputStreamReader;
6 import java.io.Reader;
7 import java.io.StringReader;
8 import java.nio.charset.Charset;
9 import java.nio.charset.CharsetDecoder;
10 import java.nio.charset.IllegalCharsetNameException;
11 import java.nio.charset.UnsupportedCharsetException;
12
13 import net.sf.bacchus.Raw;
14 import net.sf.bacchus.Record;
15 import net.sf.bacchus.charset.X_US_ASCII_ACH;
16
17 import org.springframework.batch.item.ReaderNotOpenException;
18 import org.springframework.batch.item.file.ResourceAwareItemReaderItemStream;
19 import org.springframework.batch.item.support.AbstractItemCountingItemStreamItemReader;
20 import org.springframework.core.io.Resource;
21
22 /**
23 * A restartable {@link org.springframework.batch.item.ItemReader} for
24 * {@link Raw} ACH records.
25 * <p>
26 * This class is *not* thread safe.
27 * </p>
28 */
29 public class RecordReader extends AbstractItemCountingItemStreamItemReader<Raw> implements
30 ResourceAwareItemReaderItemStream<Raw> {
31
32 /** input buffer. */
33 private final char[] buffer = new char[Record.ACH_RECORD_SIZE + 10];
34
35 /** the name of the character set to decode from the resource. */
36 private String encoding;
37
38 /** the encoding {@link java.nio.charset.CharsetDecoder#replacement()}. */
39 private String replacement;
40
41 /** the input source. */
42 private Resource resource;
43
44 /** the reader for the {@link #resource}. */
45 private Reader reader;
46
47 /** whether to tolerate end of record separator characters. */
48 private boolean lenient;
49
50 /** Creates an item reader for {@link Raw} ACH records. */
51 public RecordReader() {
52 this.reader = null;
53 this.replacement = null;
54 }
55
56 /**
57 * {@inheritDoc}
58 * @param resource {@inheritDoc}
59 */
60 public void setResource(final Resource resource) {
61 this.resource = resource;
62 this.reader = null;
63 this.replacement = "";
64 setName(getClass().getName());
65 }
66
67 /**
68 * Sets the character set to use for decoding the resource. If no character
69 * encoding is set explicitly, uses {@link X_US_ASCII_ACH} and falls back to
70 * {@code US-ASCII} if that is not available.
71 * @param encoding the name of the character set.
72 */
73 public void setEncoding(final String encoding) {
74 this.encoding = encoding;
75 }
76
77 /**
78 * Sets whether to tolerate end of record separator characters including
79 * anything defined by {@link Character#isWhitespace(int)} or
80 * {@link java.nio.charset.CharsetDecoder#replacement()}. Raw ACH files
81 * should not have them, but test and sample files may have them added for
82 * readability.
83 * @param lenient whether to tolerate end of record separator characters.
84 */
85 public void setLenientSeparator(final boolean lenient) {
86 this.lenient = lenient;
87 }
88
89 /**
90 * Opens a {@link BufferedReader} around the record resource.
91 * @see #setResource(Resource)
92 * @see #setEncoding(String)
93 * @throws IOException if the resource fails to get an input stream.
94 */
95 @Override
96 protected void doOpen() throws IOException {
97 if (this.resource != null && this.resource.exists()) {
98 final CharsetDecoder decoder = getCharset().newDecoder();
99 this.replacement = decoder.replacement();
100 this.reader = new InputStreamReader(this.resource.getInputStream(), decoder);
101 } else {
102 this.reader = new StringReader("");
103 }
104 }
105
106 /**
107 * Closes the reader.
108 * @throws IOException if the reader cannot be closed.
109 */
110 @Override
111 protected void doClose() throws IOException {
112 if (this.reader != null) {
113 this.reader.close();
114 }
115 }
116
117 /**
118 * Reads a raw record from the resource.
119 * @throws ReaderNotOpenException if the reader is not open.
120 * @throws IOException if there is an error with the underlying
121 * {@link Reader}.
122 */
123 @Override
124 protected Raw doRead() throws ReaderNotOpenException, IOException {
125 if (this.reader == null) {
126 throw new ReaderNotOpenException("Reader must be open before it can be read.");
127 }
128
129 final int start = this.lenient ? skipSeparators() : 0;
130 final int size = start + this.reader.read(this.buffer, start, Record.ACH_RECORD_SIZE - start);
131 if (size == -1) {
132 return null;
133 } else {
134 return new Raw(String.copyValueOf(this.buffer, 0, size));
135 }
136 }
137
138 /**
139 * Reads through any record separators, leaving the first non-separator in
140 * {@link #buffer}. Record separators include any
141 * {@link Character#isWhitespace(int) whitespace} and the
142 * {@link CharsetDecoder#replacement()} used by the charset decoder. If the
143 * end of input is reached, returns {@code 0}.
144 * @return the number of characters that were read into {@link #buffer}.
145 */
146 private int skipSeparators() throws IOException {
147 do {
148 final int codePoint = this.reader.read();
149 if (codePoint == -1) {
150 return 0;
151 } else if (Character.isWhitespace(codePoint)) {
152 continue;
153 } else {
154 final char[] first = Character.toChars(codePoint);
155 if (this.replacement.equals(new String(first))) {
156 // If the replacement were multi-character for a reason
157 // other than being a supplementary character, more code
158 // points would have to be read from the input to confirm a
159 // match. Since ACH records are supposed to be in the 7-bit
160 // range rather than > 16, this should never really happen.
161 continue;
162 } else {
163 System.arraycopy(first, 0, this.buffer, 0, first.length);
164 return first.length;
165 }
166 }
167 } while (true);
168 }
169
170 /**
171 * Selects a character set to decode the input resource. The search order
172 * is:
173 * <ol>
174 * <li>The encoding specified by {@link #setEncoding(String)}</li>
175 * <li>The specialized character set {@link X_US_ASCII_ACH}</li>
176 * <li>The built-in character set {@code US-ASCII}</li>
177 * <li>The default from {@link Charset#defaultCharset()}</li>
178 * </ol>
179 * @return
180 */
181 private Charset getCharset() {
182 for (final String charset : new String[] { this.encoding, X_US_ASCII_ACH.NAME, "US-ASCII" }) {
183 if (charset == null) {
184 continue;
185 } else {
186 try {
187 return Charset.forName(charset);
188 }
189 catch (final IllegalCharsetNameException e) {
190 continue;
191 }
192 catch (final UnsupportedCharsetException e) {
193 continue;
194 }
195 }
196
197 }
198 // Should never get here because US-ASCII is always supported.
199 return Charset.defaultCharset();
200 }
201
202 }