alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (CharsetMapping.java)

This example Java source code file (CharsetMapping.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

charsetmapping, comparator, corrupted, entry, ioexception, map_composite, map_doublebyte1, map_indexc2b, map_singlebyte, map_supplement, regex, runtimeexception, security, unmappable_decoding, unmappable_encoding, util

The CharsetMapping.java Java example source code

/*
 * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package sun.nio.cs;

import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.*;
import java.security.*;

public class CharsetMapping {
    public final static char UNMAPPABLE_DECODING = '\uFFFD';
    public final static int  UNMAPPABLE_ENCODING = 0xFFFD;

    char[] b2cSB;                //singlebyte b->c
    char[] b2cDB1;               //dobulebyte b->c /db1
    char[] b2cDB2;               //dobulebyte b->c /db2

    int    b2Min, b2Max;         //min/max(start/end) value of 2nd byte
    int    b1MinDB1, b1MaxDB1;   //min/Max(start/end) value of 1st byte/db1
    int    b1MinDB2, b1MaxDB2;   //min/Max(start/end) value of 1st byte/db2
    int    dbSegSize;

    char[] c2b;
    char[] c2bIndex;

    // Supplementary
    char[] b2cSupp;
    char[] c2bSupp;

    // Composite
    Entry[] b2cComp;
    Entry[] c2bComp;

    public char decodeSingle(int b) {
        return b2cSB[b];
    }

    public char decodeDouble(int b1, int b2) {
        if (b2 >= b2Min && b2 < b2Max) {
            b2 -= b2Min;
            if (b1 >= b1MinDB1 && b1 <= b1MaxDB1) {
                b1 -= b1MinDB1;
                return b2cDB1[b1 * dbSegSize + b2];
            }
            if (b1 >= b1MinDB2 && b1 <= b1MaxDB2) {
                b1 -= b1MinDB2;
                return b2cDB2[b1 * dbSegSize + b2];
            }
        }
        return UNMAPPABLE_DECODING;
    }

    // for jis0213 all supplementary characters are in 0x2xxxx range,
    // so only the xxxx part is now stored, should actually store the
    // codepoint value instead.
    public char[] decodeSurrogate(int db, char[] cc) {
        int end = b2cSupp.length / 2;
        int i = Arrays.binarySearch(b2cSupp, 0, end, (char)db);
        if (i >= 0) {
            Character.toChars(b2cSupp[end + i] + 0x20000, cc, 0);
            return cc;
        }
        return null;
    }

    public char[] decodeComposite(Entry comp, char[] cc) {
        int i = findBytes(b2cComp, comp);
        if (i >= 0) {
            cc[0] = (char)b2cComp[i].cp;
            cc[1] = (char)b2cComp[i].cp2;
            return cc;
        }
        return null;
    }

    public int encodeChar(char ch) {
        int index = c2bIndex[ch >> 8];
        if (index == 0xffff)
            return UNMAPPABLE_ENCODING;
        return c2b[index + (ch & 0xff)];
    }

    public int encodeSurrogate(char hi, char lo) {
        int cp = Character.toCodePoint(hi, lo);
        if (cp < 0x20000 || cp >= 0x30000)
            return UNMAPPABLE_ENCODING;
        int end = c2bSupp.length / 2;
        int i = Arrays.binarySearch(c2bSupp, 0, end, (char)cp);
        if (i >= 0)
            return c2bSupp[end + i];
        return UNMAPPABLE_ENCODING;
    }

    public boolean isCompositeBase(Entry comp) {
        if (comp.cp <= 0x31f7 && comp.cp >= 0xe6) {
            return (findCP(c2bComp, comp) >= 0);
        }
        return false;
    }

    public int encodeComposite(Entry comp) {
        int i = findComp(c2bComp, comp);
        if (i >= 0)
            return c2bComp[i].bs;
        return UNMAPPABLE_ENCODING;
    }

    // init the CharsetMapping object from the .dat binary file
    public static CharsetMapping get(final InputStream is) {
        return AccessController.doPrivileged(new PrivilegedAction<CharsetMapping>() {
            public CharsetMapping run() {
                return new CharsetMapping().load(is);
            }
        });
    }

    public static class Entry {
        public int bs;   //byte sequence reps
        public int cp;   //Unicode codepoint
        public int cp2;  //CC of composite
    }

    static Comparator<Entry> comparatorBytes =
        new Comparator<Entry>() {
            public int compare(Entry m1, Entry m2) {
                return m1.bs - m2.bs;
            }
            public boolean equals(Object obj) {
                return this == obj;
            }
    };

    static Comparator<Entry> comparatorCP =
        new Comparator<Entry>() {
            public int compare(Entry m1, Entry m2) {
                return m1.cp - m2.cp;
            }
            public boolean equals(Object obj) {
                return this == obj;
            }
    };

    static Comparator<Entry> comparatorComp =
        new Comparator<Entry>() {
            public int compare(Entry m1, Entry m2) {
                 int v = m1.cp - m2.cp;
                 if (v == 0)
                   v = m1.cp2 - m2.cp2;
                 return v;
            }
            public boolean equals(Object obj) {
                return this == obj;
            }
    };

    static int findBytes(Entry[] a, Entry k) {
        return Arrays.binarySearch(a, 0, a.length, k, comparatorBytes);
    }

    static int findCP(Entry[] a, Entry k) {
        return Arrays.binarySearch(a, 0, a.length, k, comparatorCP);
    }

    static int findComp(Entry[] a, Entry k) {
        return Arrays.binarySearch(a, 0, a.length, k, comparatorComp);
    }

    /*****************************************************************************/
    // tags of different charset mapping tables
    private final static int MAP_SINGLEBYTE      = 0x1; // 0..256  : c
    private final static int MAP_DOUBLEBYTE1     = 0x2; // min..max: c
    private final static int MAP_DOUBLEBYTE2     = 0x3; // min..max: c [DB2]
    private final static int MAP_SUPPLEMENT      = 0x5; //           db,c
    private final static int MAP_SUPPLEMENT_C2B  = 0x6; //           c,db
    private final static int MAP_COMPOSITE       = 0x7; //           db,base,cc
    private final static int MAP_INDEXC2B        = 0x8; // index table of c->bb

    private static final boolean readNBytes(InputStream in, byte[] bb, int N)
        throws IOException
    {
        int off = 0;
        while (N > 0) {
            int n = in.read(bb, off, N);
            if (n == -1)
                return false;
            N = N - n;
            off += n;
        }
        return true;
    }

    int off = 0;
    byte[] bb;
    private char[] readCharArray() {
        // first 2 bytes are the number of "chars" stored in this table
        int size  = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
        char [] cc = new char[size];
        for (int i = 0; i < size; i++) {
            cc[i] = (char)(((bb[off++]&0xff)<<8) | (bb[off++]&0xff));
        }
        return cc;
    }

    void readSINGLEBYTE() {
        char[] map = readCharArray();
        for (int i = 0; i < map.length; i++) {
            char c = map[i];
            if (c != UNMAPPABLE_DECODING) {
                c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)i;
            }
        }
        b2cSB = map;
    }

    void readINDEXC2B() {
        char[] map = readCharArray();
        for (int i = map.length - 1; i >= 0; i--) {
            if (c2b == null && map[i] != -1) {
                c2b = new char[map[i] + 256];
                Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING);
                break;
            }
        }
        c2bIndex = map;
    }

    char[] readDB(int b1Min, int b2Min, int segSize) {
        char[] map = readCharArray();
        for (int i = 0; i < map.length; i++) {
            char c = map[i];
            if (c != UNMAPPABLE_DECODING) {
                int b1 = i / segSize;
                int b2 = i % segSize;
                int b = (b1 + b1Min)* 256 + (b2 + b2Min);
                //System.out.printf("    DB %x\t%x%n", b, c & 0xffff);
                c2b[c2bIndex[c >> 8] + (c&0xff)] = (char)(b);
            }
        }
        return map;
    }

    void readDOUBLEBYTE1() {
        b1MinDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
        b1MaxDB1 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
        b2Min =    ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
        b2Max =    ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
        dbSegSize = b2Max - b2Min + 1;
        b2cDB1 = readDB(b1MinDB1, b2Min, dbSegSize);
    }

    void readDOUBLEBYTE2() {
        b1MinDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
        b1MaxDB2 = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
        b2Min =    ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
        b2Max =    ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
        dbSegSize = b2Max - b2Min + 1;
        b2cDB2 = readDB(b1MinDB2, b2Min, dbSegSize);
    }

    void readCOMPOSITE() {
        char[] map = readCharArray();
        int mLen = map.length/3;
        b2cComp = new Entry[mLen];
        c2bComp = new Entry[mLen];
        for (int i = 0, j= 0; i < mLen; i++) {
            Entry m = new Entry();
            m.bs = map[j++];
            m.cp = map[j++];
            m.cp2 = map[j++];
            b2cComp[i] = m;
            c2bComp[i] = m;
        }
        Arrays.sort(c2bComp, 0, c2bComp.length, comparatorComp);
    }

    CharsetMapping load(InputStream in) {
        try {
            // The first 4 bytes are the size of the total data followed in
            // this .dat file.
            int len = ((in.read()&0xff) << 24) | ((in.read()&0xff) << 16) |
                      ((in.read()&0xff) << 8) | (in.read()&0xff);
            bb = new byte[len];
            off = 0;
            //System.out.printf("In : Total=%d%n", len);
            // Read in all bytes
            if (!readNBytes(in, bb, len))
                throw new RuntimeException("Corrupted data file");
            in.close();

            while (off < len) {
                int type = ((bb[off++]&0xff)<<8) | (bb[off++]&0xff);
                switch(type) {
                case MAP_INDEXC2B:
                    readINDEXC2B();
                    break;
                case MAP_SINGLEBYTE:
                    readSINGLEBYTE();
                    break;
                case MAP_DOUBLEBYTE1:
                    readDOUBLEBYTE1();
                    break;
                case MAP_DOUBLEBYTE2:
                    readDOUBLEBYTE2();
                    break;
                case MAP_SUPPLEMENT:
                    b2cSupp = readCharArray();
                    break;
                case MAP_SUPPLEMENT_C2B:
                    c2bSupp = readCharArray();
                    break;
                case MAP_COMPOSITE:
                    readCOMPOSITE();
                    break;
                default:
                    throw new RuntimeException("Corrupted data file");
                }
            }
            bb = null;
            return this;
        } catch (IOException x) {
            x.printStackTrace();
            return null;
        }
    }
}

Other Java examples (source code examples)

Here is a short list of links related to this Java CharsetMapping.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.