编辑代码

import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.math.BigDecimal;
import java.math.RoundingMode;

public class LatexConverter {

    public static void main(String[] args) {
        LatexConverter converter = new LatexConverter();
        
        // 测试用例
        String[] testCases = {
            "5‰表示千分之五,2.35‰是百分之零点二三五,1223‱可以表示百分之零点一二二三",
            "上证综指下跌3.56%,深证成指下跌4.45%,",
            "[要计算余额宝存21000年有",
            "10,000亿元",
            "10,2.怎么样",
            "-12,345,678,000,231.231,213,214,132亿元",
            "这是一个简单的等式:[\\text{面积} = \\pi r^2]",
            "这是一个简单的等式:[\\text{年收益率})^\\text{年数}]",
            "复杂公式:[\\text{函数} f(x) = \\sum_{i=1}^{n} a_i x^i]",
            "多个公式:[\\text{力} = m \\times a] 和 [\\text{能量} = mc^2]",
            "包含希腊字母:[\\text{角度} = \\theta]",
            "集合操作:[\\text{A} \\cup \\text{B} \\subset \\text{C}]",
            "带有孖展的文本:这是一个孖展交易",
            "普通方括号:这是[普通文本],不应被转换",
            "综合例子:[\\text{方程组} \\begin{cases} 3x + 5y &= 7 \\\\ 2x - 3y &= 1 \\end{cases}]",
            "[ 20000 \\times 1.37\\div 100 \\times 1 =274]一年后你能赚274元。",
            "[\\text{每月还款额} = \\frac{1000000 \\times \\frac{4.6}{100}}{12} \\times \\frac{(1 + \\frac{4.6}{100} \\div12)^{30 \\times 12}}{(1 + \\frac{4.6}{100} \\div 12)^{30 \\times 12} - 1}]",
            "\\[\\text{利息} = \\text{本金} \\times \\text{年利率(百分数)} \\times \\text{存期}\\]其中本金为20000元,",
            "\\[ \\frac{\\text{贷款本金} \\times \\text{月利率} \\times (1 + \\text{月利率})^{\\text{还款月数}}}{(1 + \\text{月利率})^{\\text{还款月数}} - 1} \\]其中本金为20000元,",
            "\\[ \\text{每月还款额} = \\frac{\\text{贷款本金} \\times \\text{月利率} \\times (1 + \\text{月利率})^{\\text{还款月数}}}{(1 + \\text{月利率})^{\\text{还款月数}} - 1} \\]其中,",
            "\\[ 1000000 \\times 4\\div 100 \\div 12 \\times  (1 +4\\div 100 \\div 12) \\times \\times  (30 \\times 12) \\div  ((1 +4\\div 100 \\div 12) \\times \\times  (30 \\times 12) -1) \\approx 4774.15\\]",
            "\\[ 1000000 \\times 3.5\\div 100 \\div 12 \\times  (1 +3.5\\div 100 \\div 12) \\times \\times  (30 \\times 12) \\div  ((1 +3.5\\div 100 \\div 12) \\times \\times  (30 \\times 12) -1) \\approx 4490.45\\]"
 
        };

        for (String testCase : testCases) {
            System.out.println("原文: " + testCase);
            System.out.println("转换后: " + converter.addSSMLTags(testCase));
            System.out.println();
            System.out.println();
        }
    }

    
    private String addSSMLTags(String text) {
        // 处理"孖展"的情况
        if (text.contains("孖展")) {
            text = text.replace("孖展", "妈展");
        }
        if (text.contains("还款")) {
            text = text.replace("还款", "环款");
        }
        text = replacePercentageDivision(text);
        text = formatNumbersInText(text);

        // 处理千分号(‰)和万分号(‱)的情况
        StringBuilder result1 = new StringBuilder();
        Pattern pattern1 = Pattern.compile("(\\d+(?:\\.\\d+)?)[%‰‱]");
        Matcher matcher1 = pattern1.matcher(text);
        int lastEnd = 0;

        while (matcher1.find()) {
            result1.append(text, lastEnd, matcher1.start());
            String number = matcher1.group(1);
            String symbol = matcher1.group().substring(matcher1.group().length() - 1);

            if (symbol.equals("‰")) {
                result1.append("千分之").append(number);
            } else if (symbol.equals("‱")) {
                result1.append("万分之").append(number);
            } else if (symbol.equals("%")) {
                result1.append("百分之").append(number);
            }

            lastEnd = matcher1.end();
        }

        result1.append(text.substring(lastEnd));
        text = result1.toString();

        // 处理包含\text{}的方括号表达式
        Pattern pattern = Pattern.compile("\\\\?\\[(.*?(\\\\text\\{.*?}|\\\\times|\\\\div|\\\\approx|\\\\frac\\{.*?}\\{.*?}).*?)\\\\?]");

        Matcher matcher = pattern.matcher(text);
        StringBuffer sb = new StringBuffer();

        while (matcher.find()) {
            String mathExpression = matcher.group(1);
            String readableExpression = convertLatexToReadableText(mathExpression);
            // 在公式前后添加逗号,除非公式已经在句子的开头或结尾
            String replacement = matcher.start() > 0 && !text.substring(matcher.start() - 1, matcher.start()).trim().isEmpty() ? "," : "";
            replacement += readableExpression;
            replacement += matcher.end() < text.length() && !text.substring(matcher.end(), matcher.end() + 1).trim().isEmpty() ? "," : "";
            matcher.appendReplacement(sb, Matcher.quoteReplacement(replacement));
        }
        matcher.appendTail(sb);

        return sb.toString();
    }


    public String replacePercentageDivision(String text) {
        Pattern pattern = Pattern.compile("(\\d+\\s*%?)\\s*/\\s*(\\d+\\s*%?)");
        Matcher matcher = pattern.matcher(text);
        StringBuffer result = new StringBuffer();

        while (matcher.find()) {
            String left = matcher.group(1).trim();
            String right = matcher.group(2).trim();
            String replacement;

            if (left.contains("%") || right.contains("%")) {
                replacement = left + "除以" + right;
            } else {
                replacement = matcher.group(0);
            }

            matcher.appendReplacement(result, Matcher.quoteReplacement(replacement));
        }
        matcher.appendTail(result);

        return result.toString();
    }

    public static String formatNumbersInText(String text) {
        // 匹配数字,包括可能的负号、千分位逗号,以及可能的小数部分
        Pattern pattern = Pattern.compile("(-?\\d{1,3}(,\\d{3})+(\\.\\d+)?|-?\\d+(\\.\\d+)?|-?\\d+)");
        Matcher matcher = pattern.matcher(text);

        StringBuffer result = new StringBuffer();
        while (matcher.find()) {
            String numberStr = matcher.group(1);
            // 移除千分位逗号(如果存在)
            String cleanNumberStr = numberStr.replaceAll(",", "");
            BigDecimal number = new BigDecimal(cleanNumberStr);
            String formattedNumber = formatNumber(number);
            matcher.appendReplacement(result, Matcher.quoteReplacement(formattedNumber));
        }
        matcher.appendTail(result);

        return result.toString();
    }

    // 这里是之前轩昊定义的 formatNumber 函数,做了点修改,不四舍五入了
    public static String formatNumber(Object number) {
        //System.out.println(number);
        String formattedNumber = "";
        BigDecimal value = new BigDecimal(number.toString());
        BigDecimal yiyi = new BigDecimal(10000000000000000L);
        BigDecimal wanyi = new BigDecimal(1000000000000L);
        BigDecimal yi = new BigDecimal(100000000L);
        BigDecimal wan = new BigDecimal(10000L);
        if (value.abs().compareTo(yiyi) >= 0) {
        formattedNumber = value.divide(yiyi, 5, RoundingMode.HALF_UP).stripTrailingZeros().toPlainString() + "亿亿";
    } else if (value.abs().compareTo(wanyi) >= 0) {
        formattedNumber = value.divide(wanyi, 5, RoundingMode.HALF_UP).stripTrailingZeros().toPlainString() + "万亿";
    } else if (value.abs().compareTo(yi) >= 0) {
        formattedNumber = value.divide(yi, 5, RoundingMode.HALF_UP).stripTrailingZeros().toPlainString() + "亿";
    } else if (value.abs().compareTo(wan) >= 0) {
        formattedNumber = value.divide(wan, 5, RoundingMode.HALF_UP).stripTrailingZeros().toPlainString() + "万";
    } else {
        formattedNumber = value.toPlainString();
    }
        return formattedNumber;
    }

    private String convertLatexToReadableText(String latex) {
        // 将LaTeX中的\text{}转换为普通文本
        latex = latex.replaceAll("\\\\text\\{([^}]*)}",  "$1");

        // 处理方括号内的表达式
        Pattern bracketPattern = Pattern.compile("\\[(.*?)\\]");
        Matcher bracketMatcher = bracketPattern.matcher(latex);
        StringBuffer bracketSb = new StringBuffer();
        while (bracketMatcher.find()) {
            String innerExpression = bracketMatcher.group(1);
            String processedInner = processInnerExpression(innerExpression);
            bracketMatcher.appendReplacement(bracketSb, Matcher.quoteReplacement(processedInner));
        }
        bracketMatcher.appendTail(bracketSb);
        latex = bracketSb.toString();

        // 处理剩余的数学符号
        latex = processRemainingSymbols(latex);

        // 移除多余的空格
        latex = latex.replaceAll("\\s+", " ").trim();

        return latex;
    }

    private String processInnerExpression(String expression) {
        // 处理等号
        String[] parts = expression.split("=");
        if (parts.length == 2) {
            String leftSide = parts[0].trim();
            String rightSide = parts[1].trim();
            rightSide = processRemainingSymbols(rightSide);
            return leftSide + "等于" + rightSide;
        }

        // 如果没有等号,直接处理整个表达式
        return processRemainingSymbols(expression);
    }

    private String processRemainingSymbols(String latex) {
        // 处理分数
        StringBuilder result = new StringBuilder(latex);
        int start = 0;
        while ((start = result.indexOf("\\frac{", start)) != -1) {
            int openBraces = 1;
            int numeratorEnd = start + 6; // start after "\frac{"
            while (openBraces > 0 && numeratorEnd < result.length()) {
                if (result.charAt(numeratorEnd) == '{') openBraces++;
                if (result.charAt(numeratorEnd) == '}') openBraces--;
                numeratorEnd++;
            }

            openBraces = 1;
            int denominatorStart = numeratorEnd;
            int denominatorEnd = denominatorStart + 1; // start after "{"
            while (openBraces > 0 && denominatorEnd < result.length()) {
                if (result.charAt(denominatorEnd) == '{') openBraces++;
                if (result.charAt(denominatorEnd) == '}') openBraces--;
                denominatorEnd++;
            }

            if (numeratorEnd > start + 6 && denominatorEnd > denominatorStart + 1) {
                String numerator = result.substring(start + 6, numeratorEnd - 1);
                String denominator = result.substring(denominatorStart + 1, denominatorEnd - 1);
                String replacement = denominator + " 分之 " + numerator;
                result.replace(start, denominatorEnd, replacement);
                start += replacement.length();
            } else {
                start = numeratorEnd; // 如果格式不正确,跳过这个 \frac
            }
        }
        latex = result.toString();

        // 处理幂次
        StringBuilder powerResult = new StringBuilder(latex);
        start = 0;
        while ((start = powerResult.indexOf("^", start)) != -1) {
            int exponentStart = start + 1;
            int exponentEnd;
            if (exponentStart < powerResult.length() && powerResult.charAt(exponentStart) == '{') {
                int openBraces = 1;
                exponentEnd = exponentStart + 1;
                while (openBraces > 0 && exponentEnd < powerResult.length()) {
                    if (powerResult.charAt(exponentEnd) == '{') openBraces++;
                    if (powerResult.charAt(exponentEnd) == '}') openBraces--;
                    exponentEnd++;
                }
            } else {
                exponentEnd = exponentStart + 1;
                while (exponentEnd < powerResult.length() &&
                        (Character.isLetterOrDigit(powerResult.charAt(exponentEnd)) ||
                                powerResult.charAt(exponentEnd) == '\\')) {
                    exponentEnd++;
                }
            }

            if (exponentEnd > exponentStart) {
                // 找到基数
                int baseEnd = start;
                int baseStart = start - 1;
                int openParens = 0;
                while (baseStart >= 0) {
                    char c = powerResult.charAt(baseStart);
                    if (c == ')') openParens++;
                    if (c == '(') openParens--;
                    if ((openParens == 0 && !Character.isLetterOrDigit(c) && c != '\\' && c != '}') || baseStart == 0) {
                        break;
                    }
                    baseStart--;
                }
                if (baseStart != start - 1) baseStart++;  // 调整基数的起始位置

                String base = powerResult.substring(baseStart, start).trim();
                String exponent = powerResult.substring(exponentStart, exponentEnd).replaceAll("[{}]", "").trim();
                String replacement = base + "的" + exponent + "次方";
                powerResult.replace(baseStart, exponentEnd, replacement);
                start = baseStart + replacement.length();
            } else {
                start = exponentEnd;
            }
        }
        latex = powerResult.toString();

        // 替换常见的数学符号和函数
        latex = latex.replace("\\times", "乘以")
                .replace("\\div", "除以")
                .replace("\\cdot", "点")
                .replace("\\sqrt", "平方根")
                .replace("\\sum", "求和")
                .replace("\\prod", "求积")
                .replace("\\int", "积分")
                .replace("\\infty", "无穷")
                .replace("\\partial", "偏导")
                .replace("\\nabla", "梯度")
                .replace("\\in", "属于")
                .replace("\\notin", "不属于")
                .replace("\\subset", "子集")
                .replace("\\supset", "超集")
                .replace("\\cup", "并集")
                .replace("\\cap", "交集")
                .replace("\\approx", "约等于")
                .replace("\\sim", "相似")
                .replace("\\cong", "全等")
                .replace("\\perp", "垂直")
                .replace("\\parallel", "平行")
                .replace("\\degree", "度")
                .replace("\\pm", "正负")
                .replace("\\geq", "大于等于")
                .replace("\\leq", "小于等于")
                .replace("\\neq", "不等于")
                .replace("\\pi", "派")
                .replace("\\left(", "括号")
                .replace("\\right)", "括回")
                .replace("\\left[", "中括号")
                .replace("\\right]", "中括号括回")
                .replace("(", "括号")
                .replace(")", "括回")
                .replace("[", "中括号")
                .replace("]", "中括号括回")
                .replace("+", "加")
                .replace("-", "减")
                .replace("±", "正负")
                .replace("/", "除以")
                .replace("=", "等于");

        return latex;
    }
}