import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.math.BigDecimal;
import java.math.RoundingMode;
public class LatexConverter {
public static void main(String[] args) {
LatexConverter converter = new LatexConverter();
String[] testCases = {
"5‰表示千分之五,2.35‰是百分之零点二三五,1223‱可以表示百分之零点一二二三",
"上证综指下跌3.56%,深证成指下跌4.45%,",
"[要计算余额宝存21000年有",
"10,000亿元",
"10,2.怎么样",
"-12,345,678,000,231.231,213,214,132亿元",
"这是一个简单的等式:[\\text{面积} = \\pi r^2]",
"这是一个简单的等式:[\\text{年收益率})^\\text{年数}]",
"复杂公式:[\\text{函数} f(x) = \\sum_{i=1}^{n} a_i x^i]",
"多个公式:[\\text{力} = m \\times a] 和 [\\text{能量} = mc^2]",
"包含希腊字母:[\\text{角度} = \\theta]",
"集合操作:[\\text{A} \\cup \\text{B} \\subset \\text{C}]",
"带有孖展的文本:这是一个孖展交易",
"普通方括号:这是[普通文本],不应被转换",
"综合例子:[\\text{方程组} \\begin{cases} 3x + 5y &= 7 \\\\ 2x - 3y &= 1 \\end{cases}]",
"[ 20000 \\times 1.37\\div 100 \\times 1 =274]一年后你能赚274元。",
"[\\text{每月还款额} = \\frac{1000000 \\times \\frac{4.6}{100}}{12} \\times \\frac{(1 + \\frac{4.6}{100} \\div12)^{30 \\times 12}}{(1 + \\frac{4.6}{100} \\div 12)^{30 \\times 12} - 1}]",
"\\[\\text{利息} = \\text{本金} \\times \\text{年利率(百分数)} \\times \\text{存期}\\]其中本金为20000元,",
"\\[ \\frac{\\text{贷款本金} \\times \\text{月利率} \\times (1 + \\text{月利率})^{\\text{还款月数}}}{(1 + \\text{月利率})^{\\text{还款月数}} - 1} \\]其中本金为20000元,",
"\\[ \\text{每月还款额} = \\frac{\\text{贷款本金} \\times \\text{月利率} \\times (1 + \\text{月利率})^{\\text{还款月数}}}{(1 + \\text{月利率})^{\\text{还款月数}} - 1} \\]其中,",
"\\[ 1000000 \\times 4\\div 100 \\div 12 \\times (1 +4\\div 100 \\div 12) \\times \\times (30 \\times 12) \\div ((1 +4\\div 100 \\div 12) \\times \\times (30 \\times 12) -1) \\approx 4774.15\\]",
"\\[ 1000000 \\times 3.5\\div 100 \\div 12 \\times (1 +3.5\\div 100 \\div 12) \\times \\times (30 \\times 12) \\div ((1 +3.5\\div 100 \\div 12) \\times \\times (30 \\times 12) -1) \\approx 4490.45\\]"
};
for (String testCase : testCases) {
System.out.println("原文: " + testCase);
System.out.println("转换后: " + converter.addSSMLTags(testCase));
System.out.println();
System.out.println();
}
}
private String addSSMLTags(String text) {
if (text.contains("孖展")) {
text = text.replace("孖展", "妈展");
}
if (text.contains("还款")) {
text = text.replace("还款", "环款");
}
text = replacePercentageDivision(text);
text = formatNumbersInText(text);
StringBuilder result1 = new StringBuilder();
Pattern pattern1 = Pattern.compile("(\\d+(?:\\.\\d+)?)[%‰‱]");
Matcher matcher1 = pattern1.matcher(text);
int lastEnd = 0;
while (matcher1.find()) {
result1.append(text, lastEnd, matcher1.start());
String number = matcher1.group(1);
String symbol = matcher1.group().substring(matcher1.group().length() - 1);
if (symbol.equals("‰")) {
result1.append("千分之").append(number);
} else if (symbol.equals("‱")) {
result1.append("万分之").append(number);
} else if (symbol.equals("%")) {
result1.append("百分之").append(number);
}
lastEnd = matcher1.end();
}
result1.append(text.substring(lastEnd));
text = result1.toString();
Pattern pattern = Pattern.compile("\\\\?\\[(.*?(\\\\text\\{.*?}|\\\\times|\\\\div|\\\\approx|\\\\frac\\{.*?}\\{.*?}).*?)\\\\?]");
Matcher matcher = pattern.matcher(text);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
String mathExpression = matcher.group(1);
String readableExpression = convertLatexToReadableText(mathExpression);
String replacement = matcher.start() > 0 && !text.substring(matcher.start() - 1, matcher.start()).trim().isEmpty() ? "," : "";
replacement += readableExpression;
replacement += matcher.end() < text.length() && !text.substring(matcher.end(), matcher.end() + 1).trim().isEmpty() ? "," : "";
matcher.appendReplacement(sb, Matcher.quoteReplacement(replacement));
}
matcher.appendTail(sb);
return sb.toString();
}
public String replacePercentageDivision(String text) {
Pattern pattern = Pattern.compile("(\\d+\\s*%?)\\s*/\\s*(\\d+\\s*%?)");
Matcher matcher = pattern.matcher(text);
StringBuffer result = new StringBuffer();
while (matcher.find()) {
String left = matcher.group(1).trim();
String right = matcher.group(2).trim();
String replacement;
if (left.contains("%") || right.contains("%")) {
replacement = left + "除以" + right;
} else {
replacement = matcher.group(0);
}
matcher.appendReplacement(result, Matcher.quoteReplacement(replacement));
}
matcher.appendTail(result);
return result.toString();
}
public static String formatNumbersInText(String text) {
Pattern pattern = Pattern.compile("(-?\\d{1,3}(,\\d{3})+(\\.\\d+)?|-?\\d+(\\.\\d+)?|-?\\d+)");
Matcher matcher = pattern.matcher(text);
StringBuffer result = new StringBuffer();
while (matcher.find()) {
String numberStr = matcher.group(1);
String cleanNumberStr = numberStr.replaceAll(",", "");
BigDecimal number = new BigDecimal(cleanNumberStr);
String formattedNumber = formatNumber(number);
matcher.appendReplacement(result, Matcher.quoteReplacement(formattedNumber));
}
matcher.appendTail(result);
return result.toString();
}
public static String formatNumber(Object number) {
String formattedNumber = "";
BigDecimal value = new BigDecimal(number.toString());
BigDecimal yiyi = new BigDecimal(10000000000000000L);
BigDecimal wanyi = new BigDecimal(1000000000000L);
BigDecimal yi = new BigDecimal(100000000L);
BigDecimal wan = new BigDecimal(10000L);
if (value.abs().compareTo(yiyi) >= 0) {
formattedNumber = value.divide(yiyi, 5, RoundingMode.HALF_UP).stripTrailingZeros().toPlainString() + "亿亿";
} else if (value.abs().compareTo(wanyi) >= 0) {
formattedNumber = value.divide(wanyi, 5, RoundingMode.HALF_UP).stripTrailingZeros().toPlainString() + "万亿";
} else if (value.abs().compareTo(yi) >= 0) {
formattedNumber = value.divide(yi, 5, RoundingMode.HALF_UP).stripTrailingZeros().toPlainString() + "亿";
} else if (value.abs().compareTo(wan) >= 0) {
formattedNumber = value.divide(wan, 5, RoundingMode.HALF_UP).stripTrailingZeros().toPlainString() + "万";
} else {
formattedNumber = value.toPlainString();
}
return formattedNumber;
}
private String convertLatexToReadableText(String latex) {
latex = latex.replaceAll("\\\\text\\{([^}]*)}", "$1");
Pattern bracketPattern = Pattern.compile("\\[(.*?)\\]");
Matcher bracketMatcher = bracketPattern.matcher(latex);
StringBuffer bracketSb = new StringBuffer();
while (bracketMatcher.find()) {
String innerExpression = bracketMatcher.group(1);
String processedInner = processInnerExpression(innerExpression);
bracketMatcher.appendReplacement(bracketSb, Matcher.quoteReplacement(processedInner));
}
bracketMatcher.appendTail(bracketSb);
latex = bracketSb.toString();
latex = processRemainingSymbols(latex);
latex = latex.replaceAll("\\s+", " ").trim();
return latex;
}
private String processInnerExpression(String expression) {
String[] parts = expression.split("=");
if (parts.length == 2) {
String leftSide = parts[0].trim();
String rightSide = parts[1].trim();
rightSide = processRemainingSymbols(rightSide);
return leftSide + "等于" + rightSide;
}
return processRemainingSymbols(expression);
}
private String processRemainingSymbols(String latex) {
StringBuilder result = new StringBuilder(latex);
int start = 0;
while ((start = result.indexOf("\\frac{", start)) != -1) {
int openBraces = 1;
int numeratorEnd = start + 6;
while (openBraces > 0 && numeratorEnd < result.length()) {
if (result.charAt(numeratorEnd) == '{') openBraces++;
if (result.charAt(numeratorEnd) == '}') openBraces--;
numeratorEnd++;
}
openBraces = 1;
int denominatorStart = numeratorEnd;
int denominatorEnd = denominatorStart + 1;
while (openBraces > 0 && denominatorEnd < result.length()) {
if (result.charAt(denominatorEnd) == '{') openBraces++;
if (result.charAt(denominatorEnd) == '}') openBraces--;
denominatorEnd++;
}
if (numeratorEnd > start + 6 && denominatorEnd > denominatorStart + 1) {
String numerator = result.substring(start + 6, numeratorEnd - 1);
String denominator = result.substring(denominatorStart + 1, denominatorEnd - 1);
String replacement = denominator + " 分之 " + numerator;
result.replace(start, denominatorEnd, replacement);
start += replacement.length();
} else {
start = numeratorEnd;
}
}
latex = result.toString();
StringBuilder powerResult = new StringBuilder(latex);
start = 0;
while ((start = powerResult.indexOf("^", start)) != -1) {
int exponentStart = start + 1;
int exponentEnd;
if (exponentStart < powerResult.length() && powerResult.charAt(exponentStart) == '{') {
int openBraces = 1;
exponentEnd = exponentStart + 1;
while (openBraces > 0 && exponentEnd < powerResult.length()) {
if (powerResult.charAt(exponentEnd) == '{') openBraces++;
if (powerResult.charAt(exponentEnd) == '}') openBraces--;
exponentEnd++;
}
} else {
exponentEnd = exponentStart + 1;
while (exponentEnd < powerResult.length() &&
(Character.isLetterOrDigit(powerResult.charAt(exponentEnd)) ||
powerResult.charAt(exponentEnd) == '\\')) {
exponentEnd++;
}
}
if (exponentEnd > exponentStart) {
int baseEnd = start;
int baseStart = start - 1;
int openParens = 0;
while (baseStart >= 0) {
char c = powerResult.charAt(baseStart);
if (c == ')') openParens++;
if (c == '(') openParens--;
if ((openParens == 0 && !Character.isLetterOrDigit(c) && c != '\\' && c != '}') || baseStart == 0) {
break;
}
baseStart--;
}
if (baseStart != start - 1) baseStart++;
String base = powerResult.substring(baseStart, start).trim();
String exponent = powerResult.substring(exponentStart, exponentEnd).replaceAll("[{}]", "").trim();
String replacement = base + "的" + exponent + "次方";
powerResult.replace(baseStart, exponentEnd, replacement);
start = baseStart + replacement.length();
} else {
start = exponentEnd;
}
}
latex = powerResult.toString();
latex = latex.replace("\\times", "乘以")
.replace("\\div", "除以")
.replace("\\cdot", "点")
.replace("\\sqrt", "平方根")
.replace("\\sum", "求和")
.replace("\\prod", "求积")
.replace("\\int", "积分")
.replace("\\infty", "无穷")
.replace("\\partial", "偏导")
.replace("\\nabla", "梯度")
.replace("\\in", "属于")
.replace("\\notin", "不属于")
.replace("\\subset", "子集")
.replace("\\supset", "超集")
.replace("\\cup", "并集")
.replace("\\cap", "交集")
.replace("\\approx", "约等于")
.replace("\\sim", "相似")
.replace("\\cong", "全等")
.replace("\\perp", "垂直")
.replace("\\parallel", "平行")
.replace("\\degree", "度")
.replace("\\pm", "正负")
.replace("\\geq", "大于等于")
.replace("\\leq", "小于等于")
.replace("\\neq", "不等于")
.replace("\\pi", "派")
.replace("\\left(", "括号")
.replace("\\right)", "括回")
.replace("\\left[", "中括号")
.replace("\\right]", "中括号括回")
.replace("(", "括号")
.replace(")", "括回")
.replace("[", "中括号")
.replace("]", "中括号括回")
.replace("+", "加")
.replace("-", "减")
.replace("±", "正负")
.replace("/", "除以")
.replace("=", "等于");
return latex;
}
}