[31] | 1 | import java.io.BufferedReader; |
---|
| 2 | import java.io.BufferedWriter; |
---|
| 3 | import java.io.FileNotFoundException; |
---|
| 4 | import java.io.FileReader; |
---|
| 5 | import java.io.FileWriter; |
---|
| 6 | import java.io.IOException; |
---|
| 7 | import java.io.InputStream; |
---|
| 8 | import java.io.InputStreamReader; |
---|
| 9 | import java.util.ArrayList; |
---|
| 10 | import java.util.Arrays; |
---|
| 11 | import java.util.regex.Matcher; |
---|
| 12 | import java.util.regex.Pattern; |
---|
| 13 | import org.apache.commons.httpclient.HttpClient; |
---|
| 14 | import org.apache.commons.httpclient.HttpException; |
---|
| 15 | import org.apache.commons.httpclient.methods.PostMethod; |
---|
| 16 | |
---|
| 17 | |
---|
| 18 | public class DRBLTranslator { |
---|
| 19 | |
---|
| 20 | ArrayList<String> lines; |
---|
| 21 | ArrayList<Boolean> IsTranslated; |
---|
| 22 | ArrayList<Boolean> HasBeginQuote; |
---|
| 23 | ArrayList<Boolean> HasEndQuote; |
---|
| 24 | ArrayList<String> lstrs; |
---|
| 25 | ArrayList<String> rstrs; |
---|
| 26 | ArrayList<String> quotes; |
---|
| 27 | ArrayList<String> tstrs; |
---|
| 28 | ArrayList<String> ends; |
---|
| 29 | String output; |
---|
| 30 | |
---|
| 31 | public DRBLTranslator() { |
---|
| 32 | lines = new ArrayList<String>(); |
---|
| 33 | IsTranslated = new ArrayList<Boolean>(); |
---|
| 34 | HasBeginQuote = new ArrayList<Boolean>(); |
---|
| 35 | HasEndQuote = new ArrayList<Boolean>(); |
---|
| 36 | lstrs = new ArrayList<String>(); |
---|
| 37 | rstrs = new ArrayList<String>(); |
---|
| 38 | quotes = new ArrayList<String>(); |
---|
| 39 | tstrs = new ArrayList<String>(); |
---|
| 40 | ends = new ArrayList<String>(); |
---|
| 41 | output = ""; |
---|
| 42 | } |
---|
| 43 | |
---|
| 44 | public void reset() { |
---|
| 45 | lines.clear(); |
---|
| 46 | IsTranslated.clear(); |
---|
| 47 | HasBeginQuote.clear(); |
---|
| 48 | HasEndQuote.clear(); |
---|
| 49 | lstrs.clear(); |
---|
| 50 | rstrs.clear(); |
---|
| 51 | quotes.clear(); |
---|
| 52 | tstrs.clear(); |
---|
| 53 | ends.clear(); |
---|
| 54 | output = ""; |
---|
| 55 | } |
---|
| 56 | |
---|
| 57 | private static String googleTranslate(String langpair, String text) { |
---|
| 58 | |
---|
| 59 | /*********************************************************************** |
---|
| 60 | * commons-logging commons-codec |
---|
| 61 | */ |
---|
| 62 | HttpClient client = new HttpClient(); |
---|
| 63 | |
---|
| 64 | /*********************************************************** |
---|
| 65 | */ |
---|
| 66 | client.getHostConfiguration().setHost("www.google.com", 80, "http"); |
---|
| 67 | |
---|
| 68 | /*********************************************************** |
---|
| 69 | */ |
---|
| 70 | PostMethod post = new PostMethod("/translate_t"); |
---|
| 71 | // post.addParameter(new NameValuePair("url", url)); |
---|
| 72 | post.addParameter("langpair", langpair); |
---|
| 73 | post.addParameter("text", text); |
---|
| 74 | |
---|
| 75 | String s = null; |
---|
| 76 | try { |
---|
| 77 | /*********************************************************** |
---|
| 78 | */ |
---|
| 79 | client.executeMethod(post); |
---|
| 80 | |
---|
| 81 | /*********************************************************** |
---|
| 82 | */ |
---|
| 83 | InputStream in = post.getResponseBodyAsStream(); |
---|
| 84 | BufferedReader buf = new BufferedReader(new InputStreamReader(in)); |
---|
| 85 | Pattern p = Pattern |
---|
| 86 | .compile(".*id=\"*result_box\"*\\ *dir=\"*ltr\"*\\>"); |
---|
| 87 | Matcher m = null; |
---|
| 88 | |
---|
| 89 | String line; |
---|
| 90 | while ((line = buf.readLine()) != null) { |
---|
| 91 | m = p.matcher(line); |
---|
| 92 | // System.out.println(line); |
---|
| 93 | if (m.find()) { |
---|
| 94 | s = line.substring(m.end()); |
---|
| 95 | line = s; |
---|
| 96 | p = Pattern.compile("\\<\\ */\\ *div\\ *\\>"); |
---|
| 97 | m = p.matcher(line); |
---|
| 98 | if (m.find()) { |
---|
| 99 | s = line.substring(0, m.start()); |
---|
| 100 | } |
---|
| 101 | break; |
---|
| 102 | } |
---|
| 103 | } |
---|
| 104 | |
---|
| 105 | // s = GetTranslation.instostr(in); |
---|
| 106 | } catch (HttpException e) { |
---|
| 107 | e.printStackTrace(); |
---|
| 108 | } catch (IOException e) { |
---|
| 109 | e.printStackTrace(); |
---|
| 110 | } |
---|
| 111 | return s; |
---|
| 112 | } |
---|
| 113 | |
---|
| 114 | public static String instostr(InputStream in) throws IOException { |
---|
| 115 | StringBuffer out = new StringBuffer(); |
---|
| 116 | byte[] b = new byte[4096]; |
---|
| 117 | for (int n; (n = in.read(b)) != -1;) { |
---|
| 118 | out.append(new String(b, 0, n)); |
---|
| 119 | } |
---|
| 120 | return out.toString(); |
---|
| 121 | } |
---|
| 122 | |
---|
| 123 | public void dump() { |
---|
| 124 | for (int i=0; i<lines.size(); i++) { |
---|
| 125 | System.out.println("======== Line "+i+" ========"); |
---|
| 126 | System.out.println(lines.get(i)); |
---|
| 127 | System.out.println(IsTranslated.get(i)); |
---|
| 128 | System.out.println(HasBeginQuote.get(i)); |
---|
| 129 | System.out.println(HasEndQuote.get(i)); |
---|
| 130 | System.out.println(lstrs.get(i)); |
---|
| 131 | System.out.println(rstrs.get(i)); |
---|
| 132 | System.out.println(tstrs.get(i)); |
---|
| 133 | System.out.println(quotes.get(i)); |
---|
| 134 | System.out.println(ends.get(i)); |
---|
| 135 | System.out.println("======== Line "+i+" ========"); |
---|
| 136 | System.out.println(); |
---|
| 137 | } |
---|
| 138 | } |
---|
| 139 | |
---|
| 140 | public String toConfiguration() { |
---|
| 141 | output=""; |
---|
| 142 | for (int i=0; i<lines.size(); i++) { |
---|
| 143 | |
---|
| 144 | if (IsTranslated.get(i)) { |
---|
| 145 | //strlines += lstrs.get(i) + "=" + quotes.get(i) + tstrs.get(i) + quotes.get(i); |
---|
| 146 | output += lstrs.get(i); |
---|
| 147 | if (HasBeginQuote.get(i)) { |
---|
| 148 | output += "=" + quotes.get(i); |
---|
| 149 | } |
---|
| 150 | output += tstrs.get(i); |
---|
| 151 | if (HasEndQuote.get(i)) { |
---|
| 152 | output += quotes.get(i); |
---|
| 153 | } |
---|
| 154 | output += ends.get(i); |
---|
| 155 | output += "\n"; |
---|
| 156 | } else { |
---|
| 157 | output += lines.get(i) + "\n"; |
---|
| 158 | } |
---|
| 159 | } |
---|
| 160 | return output; |
---|
| 161 | } |
---|
| 162 | |
---|
| 163 | public String getConfiguration() { |
---|
| 164 | return output; |
---|
| 165 | } |
---|
| 166 | public void loadFile(String filepath) { |
---|
| 167 | reset(); |
---|
| 168 | BufferedReader reader = null; |
---|
| 169 | try { |
---|
| 170 | reader = new BufferedReader(new FileReader(filepath)); |
---|
| 171 | } catch (FileNotFoundException e) { |
---|
| 172 | e.printStackTrace(); |
---|
| 173 | } catch (IOException e) { |
---|
| 174 | e.printStackTrace(); |
---|
| 175 | } |
---|
| 176 | |
---|
| 177 | int count = 0, index = 0; |
---|
| 178 | String line = null, lstr = null, rstr = null; |
---|
| 179 | Boolean hasEndQuote; |
---|
| 180 | try { |
---|
| 181 | while ((line = reader.readLine()) != null) { |
---|
| 182 | line = line.trim(); |
---|
| 183 | lines.add(line); |
---|
| 184 | if (line.contains("=")) { |
---|
| 185 | count++; |
---|
| 186 | index = line.indexOf("="); |
---|
| 187 | lstr = line.substring(0, index); |
---|
| 188 | rstr = line.substring(index + 2, line.length()-1); |
---|
| 189 | HasBeginQuote.add(true); |
---|
| 190 | if (line.charAt(index+1)!=line.charAt(line.length()-1)) { |
---|
| 191 | |
---|
| 192 | if (line.charAt(index+1)==line.charAt(line.length()-2)) { |
---|
| 193 | ends.add(String.valueOf(line.charAt(line.length()-1))); |
---|
| 194 | rstr = line.substring(index + 2, line.length()-2); |
---|
| 195 | HasEndQuote.add(true); |
---|
| 196 | } else { |
---|
| 197 | ends.add(""); |
---|
| 198 | HasEndQuote.add(false); |
---|
| 199 | } |
---|
| 200 | } else { |
---|
| 201 | HasEndQuote.add(true); |
---|
| 202 | ends.add(""); |
---|
| 203 | } |
---|
| 204 | lstrs.add(lstr); |
---|
| 205 | rstrs.add(rstr); |
---|
| 206 | IsTranslated.add(true); |
---|
| 207 | quotes.add(String.valueOf(line.charAt(index+1))); |
---|
| 208 | } else if (line.startsWith("#") || line.equals("")){ |
---|
| 209 | IsTranslated.add(false); |
---|
| 210 | lstrs.add(""); |
---|
| 211 | rstrs.add(""); |
---|
| 212 | quotes.add(""); |
---|
| 213 | HasBeginQuote.add(false); |
---|
| 214 | HasEndQuote.add(false); |
---|
| 215 | ends.add(""); |
---|
| 216 | } else { |
---|
| 217 | hasEndQuote=false; |
---|
| 218 | IsTranslated.add(true); |
---|
| 219 | lstrs.add(""); |
---|
| 220 | int lidx=0, ridx=0; |
---|
| 221 | char quote=' '; |
---|
| 222 | if (line.startsWith("'") || line.startsWith("\"")) { |
---|
| 223 | HasBeginQuote.add(true); |
---|
| 224 | lidx=1; |
---|
| 225 | } else { |
---|
| 226 | HasBeginQuote.add(false); |
---|
| 227 | lidx=0; |
---|
| 228 | } |
---|
| 229 | if (line.endsWith("'") || line.endsWith("\"")) { |
---|
| 230 | HasEndQuote.add(true); |
---|
| 231 | hasEndQuote=true; |
---|
| 232 | ridx=line.length()-1; |
---|
| 233 | } else if (line.charAt(line.length()-2)=='\'' || line.charAt(line.length()-2)=='\"'){ |
---|
| 234 | HasEndQuote.add(true); |
---|
| 235 | hasEndQuote=true; |
---|
| 236 | ridx=line.length()-2; |
---|
| 237 | } else { |
---|
| 238 | HasEndQuote.add(false); |
---|
| 239 | ridx=line.length(); |
---|
| 240 | } |
---|
| 241 | if (hasEndQuote) { |
---|
| 242 | if (line.endsWith("'") || line.endsWith("\"")) { |
---|
| 243 | quote = line.charAt(line.length()-1); |
---|
| 244 | quotes.add(String.valueOf(quote)); |
---|
| 245 | } else { |
---|
| 246 | quote = line.charAt(line.length()-2); |
---|
| 247 | quotes.add(String.valueOf(quote)); |
---|
| 248 | } |
---|
| 249 | } else { |
---|
| 250 | quote = line.charAt(0); |
---|
| 251 | quotes.add(String.valueOf(quote)); |
---|
| 252 | } |
---|
| 253 | |
---|
| 254 | if (hasEndQuote) { |
---|
| 255 | if (quote!=line.charAt(line.length()-1)) { |
---|
| 256 | ends.add(String.valueOf(line.charAt(line.length()-1))); |
---|
| 257 | } else |
---|
| 258 | ends.add(""); |
---|
| 259 | } else |
---|
| 260 | ends.add(""); |
---|
| 261 | rstrs.add(line.substring(lidx,ridx)); |
---|
| 262 | } |
---|
| 263 | } |
---|
| 264 | reader.close(); |
---|
| 265 | } catch (IOException e) { |
---|
| 266 | e.printStackTrace(); |
---|
| 267 | } |
---|
| 268 | } |
---|
| 269 | |
---|
| 270 | public void translate(String langpair) { |
---|
| 271 | String str = "", tstr=""; |
---|
| 272 | for (int i=0; i<rstrs.size(); i++){ |
---|
| 273 | str += rstrs.get(i) + "\n"; |
---|
| 274 | |
---|
| 275 | if (i==0 && rstrs.size()>1) |
---|
| 276 | tstrs.add(""); |
---|
| 277 | if ((i!=0 || (i==0 && rstrs.size()==1)) && (((i%300)==0) || i==(rstrs.size()-1)) ) { |
---|
| 278 | //System.out.println("i="+i); |
---|
| 279 | //System.out.println("lstr="+lstrs.get(i)); |
---|
| 280 | tstr = DRBLTranslator.googleTranslate(langpair, str).trim() |
---|
| 281 | .replaceAll("\\s*"\\s*", "\"") |
---|
| 282 | .replaceAll("\\s*>\\s*", ">") |
---|
| 283 | .replaceAll("\\s*'\\s*", "'"); |
---|
| 284 | |
---|
| 285 | str = ""; |
---|
| 286 | tstrs.addAll(Arrays.asList(tstr.split("\\s*<br>\\s*"))); |
---|
| 287 | //System.out.println("tstr="+tstrs.get(i)); |
---|
| 288 | try { |
---|
| 289 | Thread.sleep(3000); |
---|
| 290 | } catch (InterruptedException e) { |
---|
| 291 | e.printStackTrace(); |
---|
| 292 | } |
---|
| 293 | } |
---|
| 294 | } |
---|
| 295 | } |
---|
| 296 | |
---|
| 297 | public void toFile(String out) { |
---|
| 298 | |
---|
| 299 | try { |
---|
| 300 | BufferedWriter writer = new BufferedWriter(new FileWriter(out)); |
---|
| 301 | writer.write(getConfiguration()); |
---|
| 302 | writer.flush(); |
---|
| 303 | writer.close(); |
---|
| 304 | } catch (FileNotFoundException e) { |
---|
| 305 | e.printStackTrace(); |
---|
| 306 | } catch (IOException e) { |
---|
| 307 | e.printStackTrace(); |
---|
| 308 | } |
---|
| 309 | |
---|
| 310 | } |
---|
| 311 | |
---|
| 312 | public void srcToFile(String langpair, String in, String out) { |
---|
| 313 | loadFile(in); |
---|
| 314 | translate(langpair); |
---|
| 315 | toConfiguration(); |
---|
| 316 | toFile(out); |
---|
| 317 | } |
---|
| 318 | public static void main(String[] args) throws HttpException, IOException { |
---|
| 319 | |
---|
| 320 | if (null == args || args.length < 3) { |
---|
| 321 | System.out |
---|
[35] | 322 | .println("Usage: java DRBLTranslator (language_pair) (source_file) (target_file)"); |
---|
[31] | 323 | System.out |
---|
[35] | 324 | .println("Example: java DRBLTranslator \"en|zh-TW\" \"en_US\" \"zh_TW\""); |
---|
[31] | 325 | System.exit(1); |
---|
| 326 | } |
---|
| 327 | |
---|
| 328 | DRBLTranslator trans = new DRBLTranslator(); |
---|
| 329 | trans.srcToFile(args[0], args[1], args[2]); |
---|
| 330 | //trans.srcToFile("en|zh-TW", "en_US", "zh_TW"); |
---|
| 331 | System.out.println("ok"); |
---|
| 332 | //System.out.println(trans.toConfiguration()); |
---|
| 333 | //trans.dump(); |
---|
| 334 | |
---|
| 335 | } |
---|
| 336 | } |
---|