1 package net.sf.bacchus.charset;
2
3 import java.nio.ByteBuffer;
4 import java.nio.charset.Charset;
5 import java.nio.charset.CharsetEncoder;
6
7 /**
8 * {@link java.nio.charset.Charset} that decodes only the characters that are
9 * allowed in an ACH entry and attempts to transpose other characters to 1-byte
10 * equivalents when encoding to ACH. The transpositions replace characters found
11 * within the {@link java.lang.Character.UnicodeBlock#LATIN_1_SUPPLEMENT} and
12 * {@link java.lang.Character.UnicodeBlock#LATIN_EXTENDED_A} ranges with
13 * reasonable equivalents within the 7-bit ASCII range that is allowed within an
14 * ACH record. In most cases, the replacement is simply the unaccented version
15 * of the same character.
16 */
17 public class X_ACH_XP extends X_US_ASCII_ACH {
18
19 /** the name of this character set. */
20 public static final String NAME = "X-ACH-X";
21
22 /** marker for unmappable characters. */
23 private static final byte NONE = 0x00;
24
25 /**
26 * transposition equivalents for the Unicode C1 Control Characters and
27 * Latin-1 Supplement starting at 0x0080.
28 */
29 private static final byte[] LATIN_1_SUPPLEMENT = new byte[] { NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
30 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
31 NONE, NONE, NONE, NONE, NONE, NONE, '!', NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, '"', NONE,
32 NONE, NONE, NONE, NONE, NONE, '2', '3', NONE, NONE, NONE, NONE, NONE, NONE, NONE, '"', NONE, NONE, NONE,
33 '?', 'A', 'A', 'A', 'A', 'A', 'A', NONE, 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I', 'D', 'N', 'O', 'O',
34 'O', 'O', 'O', NONE, 'O', 'U', 'U', 'U', 'U', 'Y', NONE, NONE, 'a', 'a', 'a', 'a', 'a', 'a', NONE, 'c',
35 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', NONE, 'n', 'o', 'o', 'o', 'o', 'o', '/', 'o', 'u', 'u', 'u', 'u',
36 'y', NONE, 'y' };
37
38 /**
39 * transposition equivalents for the Unicode Latin Extended-A characters
40 * starting at 0x0100.
41 */
42 private static final byte[] LATIN_EXTENDED_A = new byte[] { 'A', 'a', 'A', 'a', 'A', 'a', 'C', 'c', 'C', 'c', 'C',
43 'c', 'C', 'c', 'D', 'd', 'D', 'd', 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'G', 'g', 'G', 'g',
44 'G', 'g', 'G', 'g', 'H', 'h', 'H', 'h', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i', NONE, NONE, 'J',
45 'j', 'K', 'k', 'k', 'L', 'l', 'L', 'l', 'L', 'l', 'L', 'l', 'L', 'l', 'N', 'n', 'N', 'n', 'N', 'n', 'n',
46 'N', 'n', 'O', 'o', 'O', 'o', 'O', 'o', NONE, NONE, 'R', 'r', 'R', 'r', 'R', 'r', 'S', 's', 'S', 's', 'S',
47 's', 'S', 's', 'T', 't', 'T', 't', 'T', 't', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u',
48 'W', 'w', 'Y', 'y', 'Y', 'Z', 'z', 'Z', 'z', 'z', 'z', 's' };
49
50 /**
51 * Encoder that attempts to map out-of-range characters to equivalent
52 * characters that can be represented using single-byte representations
53 * within the characters valid in an ACH record. Accented characters are
54 * mapped to their non-accented form.
55 */
56 protected static class Transposer extends Encoder {
57
58 /**
59 * Constructs an encoder for {@code X-ACH-X}.
60 * @param cs the character set this decodes.
61 */
62 public Transposer(final X_ACH_XP cs) {
63 this(cs, 1);
64 }
65
66 /**
67 * Constructs an encoder for {@code X-ACH-X}.
68 * @param cs the character set this decodes.
69 * @param maxBytesPerChar the maximum number of bytes for a single
70 * character.
71 */
72 protected Transposer(final Charset cs, final float maxBytesPerChar) {
73 super(cs, maxBytesPerChar);
74 }
75
76 /**
77 * Maps characters to their single-byte equivalents.
78 * @param c {@inheritDoc}
79 * @param out {@inheritDoc}
80 * @return {@inheritDoc}
81 */
82 @Override
83 protected boolean map(final char c, final ByteBuffer out) {
84 final byte equivalent;
85 if (c > 127 && c < 256) {
86 equivalent = LATIN_1_SUPPLEMENT[c - 128];
87 } else if (c >= 256 && c < 384) {
88 equivalent = LATIN_EXTENDED_A[c - 256];
89 } else {
90 equivalent = NONE;
91 }
92
93 if (equivalent == NONE) {
94 return false;
95 } else {
96 out.put(equivalent);
97 return true;
98 }
99 }
100 }
101
102 /**
103 * Initializes a new {@code X-ACH-X} {@link Charset}.
104 */
105 public X_ACH_XP() {
106 this(NAME);
107 }
108
109 /**
110 * Initializes a new ACH {@link Charset} with the given name and no aliases.
111 * @param name the name of the character set.
112 */
113 protected X_ACH_XP(final String name) {
114 super(name);
115 }
116
117 /**
118 * This is a very limited {@link Charset} that is assumed only to contain
119 * itself and those character sets contained by {@link X_US_ASCII_ACH}.
120 * @param cs {@inheritDoc}
121 * @return {@inheritDoc}
122 * @see X_US_ASCII_ACH#contains(Charset)
123 */
124 @Override
125 public boolean contains(final Charset cs) {
126 return NAME.equals(cs.name()) || super.contains(cs);
127 }
128
129 /**
130 * Delegates to {@link X_US_ASCII_ACH#newEncoder()}, which is a temporary
131 * implementation.
132 * @return {@inheritDoc}
133 * @see X_US_ASCII_ACH#newEncoder()
134 */
135 @Override
136 public CharsetEncoder newEncoder() {
137 return new Transposer(this);
138 }
139
140 }