ChineseToFirstLetterUtil.java 8.86 KB
package com.lhcredit.common.utils;
import com.alibaba.fastjson.JSON;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * 提取汉字首字母工具类
 */
public class ChineseToFirstLetterUtil {

    private static Map<String, List<String>> pinyinMap = new HashMap<String, List<String>>();
    static{
        initPinyin();
    }
    /**
     * 初始化 所有的多音字词组
     */
    public static void initPinyin() {
        // 读取多音字的全部拼音表;
        InputStream file = PinyinHelper.class.getResourceAsStream("/pinyin/duoyinzi_dict.txt");
        BufferedReader br = new BufferedReader(new InputStreamReader(file));
        String s = null;
        try {
            while ((s = br.readLine()) != null) {
                if (s != null) {
                    String[] arr = s.split("#");
                    String pinyin = arr[0];
                    String chinese = arr[1];

                    if(chinese!=null){
                        String[] strs = chinese.split(" ");
                        List<String> list = Arrays.asList(strs);
                        pinyinMap.put(pinyin, list);
                    }
                }
            }

        } catch (IOException e) {
            e.printStackTrace();
        }finally{
            try {
                br.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public static String ChineseToFirstLetter(String c) {
        String string = "";
        char b;
        int a = c.length();
        for (int k = 0; k < a; k++) {
            b = c.charAt(k);
            String d = String.valueOf(b);
            String str = converterToFirstSpell(d,c);
            String s = str.toUpperCase();
            String g = s;
            char h;
            int j = g.length();
            for (int y = 0; y <= 0; y++) {
                h = g.charAt(0);
                string += h;
            }
        }
        return string;
    }

    public static String converterToFirstSpell(String chines,String chinese) {
        String pinyinName = "";
        char[] nameChar = chines.toCharArray();
        HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
        defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
        defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
        for (int i = 0; i < nameChar.length; i++) {
            String sc = String.valueOf(nameChar[i]);
            if (sc.matches("[\\u4e00-\\u9fa5]")) {
                try {
                    String[] mPinyinArray = PinyinHelper.toHanyuPinyinStringArray(nameChar[i], defaultFormat);
                    if (mPinyinArray == null) {  //非中文
                        return "";
                    } else {
                        int len = mPinyinArray.length;
                        if (len == 1) { // 不是多音字
                            String py = mPinyinArray[0];
                            if (py.contains("u:")) {  //过滤 u:
                                py = py.replace("u:", "v");
                            }
                            pinyinName += mPinyinArray[0];
                        } else if (mPinyinArray[0].equals(mPinyinArray[1])) {    //非多音字 有多个音,取第一个
                            pinyinName += mPinyinArray[0];
                        } else { // 多音字
                            int length = chinese.length();
                            boolean flag = false;
                            String s = "";
                            List<String> keyList = null;
                            for (int x = 0; x < len; x++) {
                                String py = mPinyinArray[x];
                                if (py.contains("u:")) {  //过滤 u:
                                    py = py.replace("u:", "v");
                                }
                                keyList = pinyinMap.get(py);
                                if (i + 3 <= length) {   //后向匹配2个汉字  大西洋
                                    s = chinese.substring(i, i + 3);
                                    if (keyList != null && (keyList.contains(s))) {
                                        pinyinName += py;
                                        flag = true;
                                        break;
                                    }
                                }
                                if (i + 2 <= length) {   //后向匹配 1个汉字  大西
                                    s = chinese.substring(i, i + 2);
                                    if (keyList != null && (keyList.contains(s))) {
                                        pinyinName += py;
                                        flag = true;
                                        break;
                                    }
                                }
                                if ((i - 2 >= 0) && (i + 1 <= length)) {  // 前向匹配2个汉字 龙固大
                                    s = chinese.substring(i - 2, i + 1);
                                    if (keyList != null && (keyList.contains(s))) {
                                        pinyinName += py;
                                        flag = true;
                                        break;
                                    }
                                }
                                if ((i - 1 >= 0) && (i + 1 <= length)) {  // 前向匹配1个汉字   固大
                                    s = chinese.substring(i - 1, i + 1);
                                    if (keyList != null && (keyList.contains(s))) {
                                        pinyinName += py;
                                        flag = true;
                                        break;
                                    }
                                }
                                if ((i - 1 >= 0) && (i + 2 <= length)) {  //前向1个,后向1个      固大西
                                    s = chinese.substring(i - 1, i + 2);
                                    if (keyList != null && (keyList.contains(s))) {
                                        pinyinName += py;
                                        flag = true;
                                        break;
                                    }
                                }
                            }
                            if (!flag) {    //都没有找到,匹配默认的 读音  大
                                s = String.valueOf(nameChar[i]);
                                for (int x = 0; x < len; x++) {
                                    String py = mPinyinArray[x];
                                    if (py.contains("u:")) {  //过滤 u:
                                        py = py.replace("u:", "v");
                                    }
                                    keyList = pinyinMap.get(py);
                                    if (keyList != null && (keyList.contains(s))) {
                                        pinyinName += py;
                                        break;
                                    }else{
                                        pinyinName += mPinyinArray[0];
                                        break;
                                    }
                                }
                            }
                        }
                    }
                } catch (BadHanyuPinyinOutputFormatCombination e) {
                    e.printStackTrace();
                }
            } else {
                pinyinName += nameChar[i];
            }
        }
        return pinyinName;
    }


    /**
     * 将某个字符串的首字母 大写
     * @param str
     * @return
     */
    public static String convertInitialToUpperCase(String str){
        if(str==null){
            return null;
        }
        StringBuffer sb = new StringBuffer();
        char[] arr = str.toCharArray();
        for(int i=0;i<arr.length;i++){
            char ch = arr[i];
            if(i==0){
                sb.append(String.valueOf(ch).toUpperCase());
            }else{
                sb.append(ch);
            }
        }
        return sb.toString();
    }

    public static void main(String[] args) {
//        System.err.println(ChineseToFirstLetter("重庆"));
//        System.err.println(ChineseToFirstLetter("长沙"));
//        System.err.println(ChineseToFirstLetter("武汉"));
        System.err.println(ChineseToFirstLetter("陆家嘴信托"));
    }
}