View Javadoc
1   /*
2    * This file is part of Waarp Project (named also Waarp or GG).
3    *
4    *  Copyright (c) 2019, Waarp SAS, and individual contributors by the @author
5    *  tags. See the COPYRIGHT.txt in the distribution for a full listing of
6    * individual contributors.
7    *
8    *  All Waarp Project is free software: you can redistribute it and/or
9    * modify it under the terms of the GNU General Public License as published by
10   * the Free Software Foundation, either version 3 of the License, or (at your
11   * option) any later version.
12   *
13   * Waarp is distributed in the hope that it will be useful, but WITHOUT ANY
14   * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15   * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16   *
17   *  You should have received a copy of the GNU General Public License along with
18   * Waarp . If not, see <http://www.gnu.org/licenses/>.
19   */
20  package org.waarp.common.transcode;
21  
22  import org.waarp.common.file.FileUtils;
23  import org.waarp.common.logging.SysErrLogger;
24  import org.waarp.common.logging.WaarpLogger;
25  import org.waarp.common.logging.WaarpLoggerFactory;
26  
27  import java.io.File;
28  import java.io.FileInputStream;
29  import java.io.FileNotFoundException;
30  import java.io.FileOutputStream;
31  import java.io.IOException;
32  import java.io.InputStreamReader;
33  import java.io.OutputStreamWriter;
34  import java.io.UnsupportedEncodingException;
35  import java.nio.charset.Charset;
36  import java.util.Map.Entry;
37  import java.util.Set;
38  import java.util.SortedMap;
39  
40  /**
41   * Helper to print in output the Charsets available in the JVM.<br>
42   * <br>
43   * -html will output HTML format<br>
44   * -text (default) will output TEXT format<br>
45   * -csv will output CSV (comma separated) format<br>
46   * <br>
47   * Allow also to transcode one file to another: all arguments mandatory<br>
48   * -from filename charset<br>
49   * -to filename charset<br>
50   */
51  public final class CharsetsUtil {
52    /**
53     * Internal Logger
54     */
55    private static final WaarpLogger logger =
56        WaarpLoggerFactory.getLogger(CharsetsUtil.class);
57  
58    private CharsetsUtil() {
59    }
60  
61    /**
62     * @param args
63     */
64    public static void main(final String[] args) {
65      int format = 1; // TEXT
66      boolean transcode = false;
67      String fromFilename = null;
68      String fromCharset = null;
69      String toFilename = null;
70      String toCharset = null;
71      if (args.length > 0) {
72        for (int i = 0; i < args.length; i++) {
73          if ("-html".equalsIgnoreCase(args[i])) {
74            format = 0;
75          } else if ("-text".equalsIgnoreCase(args[i])) {
76            format = 1;
77          } else if ("-csv".equalsIgnoreCase(args[i])) {
78            format = 2;
79          } else if ("-to".equalsIgnoreCase(args[i])) {
80            i++;
81            toFilename = args[i];
82            i++;
83            toCharset = args[i];
84          } else if ("-from".equalsIgnoreCase(args[i])) {
85            i++;
86            fromFilename = args[i];
87            i++;
88            fromCharset = args[i];
89          }
90        }
91        transcode =
92            toCharset != null && toFilename != null && fromCharset != null &&
93            fromFilename != null;
94      }
95      if (transcode) {
96        final boolean status =
97            transcode(fromFilename, fromCharset, toFilename, toCharset, 16384);
98        SysErrLogger.FAKE_LOGGER.sysout("Transcode: " + status);
99      } else {
100       printOutCharsetsAvailable(format);
101     }
102   }
103 
104   /**
105    * @param format 0 = html, 1 = text, 2 = csv
106    */
107   public static void printOutCharsetsAvailable(final int format) {
108     final SortedMap<String, Charset> map = Charset.availableCharsets();
109     final Set<Entry<String, Charset>> set = map.entrySet();
110     switch (format) {
111       case 0:
112         SysErrLogger.FAKE_LOGGER.sysout(
113             "<html><body><table border=1><tr><th>Name</th><th>CanEncode</th><th>IANA Registered</th><th>Aliases</th></tr>");
114         break;
115       case 1:
116         SysErrLogger.FAKE_LOGGER.sysout(
117             "Name\tCanEncode\tIANA Registered\tAliases");
118         break;
119       case 2:
120       default:
121         SysErrLogger.FAKE_LOGGER.sysout(
122             "Name,CanEncode,IANA Registered,Aliases");
123         break;
124     }
125     for (final Entry<String, Charset> entry : set) {
126       final Charset charset = entry.getValue();
127       final StringBuilder aliases;
128       switch (format) {
129         case 0:
130           aliases = new StringBuilder("<ul>");
131           break;
132         case 1:
133         case 2:
134         default:
135           aliases = new StringBuilder("[ ");
136           break;
137       }
138       final Set<String> aliasCharset = charset.aliases();
139       for (final String string : aliasCharset) {
140         switch (format) {
141           case 0:
142             aliases.append("<li>").append(string).append("</li>");
143             break;
144           case 1:
145           case 2:
146           default:
147             aliases.append(string).append(' ');
148             break;
149         }
150       }
151       switch (format) {
152         case 0:
153           aliases.append("</ul>");
154           break;
155         case 1:
156         case 2:
157         default:
158           aliases.append(']');
159           break;
160       }
161       switch (format) {
162         case 0:
163           SysErrLogger.FAKE_LOGGER.sysout(
164               "<tr><td>" + entry.getKey() + "</td><td>" + charset.canEncode() +
165               "</td><td>" + charset.isRegistered() + "</td><td>" + aliases +
166               "</td>");
167           break;
168         case 1:
169           SysErrLogger.FAKE_LOGGER.sysout(
170               entry.getKey() + '\t' + charset.canEncode() + '\t' +
171               charset.isRegistered() + '\t' + aliases);
172           break;
173         case 2:
174         default:
175           SysErrLogger.FAKE_LOGGER.sysout(
176               entry.getKey() + ',' + charset.canEncode() + ',' +
177               charset.isRegistered() + ',' + aliases);
178           break;
179       }
180     }
181     switch (format) {
182       case 0:
183         SysErrLogger.FAKE_LOGGER.sysout("</table></body></html>");
184         break;
185       case 1:
186       case 2:
187       default:
188         break;
189     }
190   }
191 
192   /**
193    * Method to transcode one file to another using 2 different charsets
194    *
195    * @param srcFilename
196    * @param fromCharset
197    * @param toFilename
198    * @param toCharset
199    * @param bufferSize
200    *
201    * @return True if OK, else False (will log the reason)
202    */
203   public static boolean transcode(final String srcFilename,
204                                   final String fromCharset,
205                                   final String toFilename,
206                                   final String toCharset,
207                                   final int bufferSize) {
208     boolean success = false;
209     final File from = new File(srcFilename);
210     final File to = new File(toFilename);
211     FileInputStream fileInputStream = null;
212     InputStreamReader reader = null;
213     FileOutputStream fileOutputStream = null;
214     OutputStreamWriter writer = null;
215     try {
216       fileInputStream = new FileInputStream(from);
217       reader = new InputStreamReader(fileInputStream, fromCharset);
218       fileOutputStream = new FileOutputStream(to);
219       writer = new OutputStreamWriter(fileOutputStream, toCharset);
220       final char[] cbuf = new char[bufferSize];
221       int read = reader.read(cbuf);
222       while (read > 0) {
223         writer.write(cbuf, 0, read);
224         read = reader.read(cbuf);
225       }
226       success = true;
227     } catch (final FileNotFoundException e) {
228       logger.warn("File not found: {}", e.getMessage());
229     } catch (final UnsupportedEncodingException e) {
230       logger.warn("Unsupported Encoding: {}", e.getMessage());
231     } catch (final IOException e) {
232       logger.warn("File IOException: {}", e.getMessage());
233     } finally {
234       FileUtils.close(reader);
235       FileUtils.close(fileInputStream);
236       FileUtils.close(writer);
237       FileUtils.close(fileOutputStream);
238     }
239     return success;
240   }
241 
242 }