change longest common text

This commit is contained in:
Looly 2021-08-01 20:54:28 +08:00
parent 0026ffff93
commit 74b4b68bb0
3 changed files with 74 additions and 25 deletions

View File

@ -12,6 +12,7 @@
* 【json 】 JSONGetter增加getLocalDateTime方法pr#387@Gitee
* 【core 】 增加JNDIUtilissue#1727@Github
* 【core 】 SpringUtil增加unregisterBean方法pr#388@Gitee
* 【core 】 优化TextSimilarity公共子串算法issue#I42A6V@Gitee
### 🐞Bug修复
* 【jwt 】 修复JWTUtil中几个方法非static的问题issue#1735@Github

View File

@ -13,7 +13,12 @@ import cn.hutool.core.util.StrUtil;
public class TextSimilarity {
/**
* 计算相似度两个都是空串相似度为1被认为是相同的串
* 计算相似度两个都是空串相似度为1被认为是相同的串<br>
* 比较方法为
* <ul>
* <li>只比较两个字符串字母数字汉字部分其他符号去除</li>
* <li>计算出两个字符串最大子串除以最长的字符串结果即为相似度</li>
* </ul>
*
* @param strA 字符串1
* @param strB 字符串2
@ -36,8 +41,8 @@ public class TextSimilarity {
return 1;
}
int temp2 = longestCommonSubstring(newStrA, newStrB).length();
return NumberUtil.div(temp2, temp);
final int commonLength = longestCommonSubstringLength(newStrA, newStrB);
return NumberUtil.div(commonLength, temp);
}
/**
@ -52,6 +57,40 @@ public class TextSimilarity {
return NumberUtil.formatPercent(similar(strA, strB), scale);
}
/**
* 最长公共子串采用动态规划算法 其不要求所求得的字符在所给的字符串中是连续的<br>
* 算法解析见https://leetcode-cn.com/problems/longest-common-subsequence/solution/zui-chang-gong-gong-zi-xu-lie-by-leetcod-y7u0/
*
* @param strA 字符串1
* @param strB 字符串2
* @return 最长公共子串
*/
public static String longestCommonSubstring(String strA, String strB) {
// 初始化矩阵数据,matrix[0][0]的值为0 如果字符数组chars_strA和chars_strB的对应位相同则matrix[i][j]的值为左上角的值加1
// 否则matrix[i][j]的值等于左上方最近两个位置的较大值 矩阵中其余各点的值为0.
final int[][] matrix = generateMatrix(strA, strB);
int m = strA.length();
int n = strB.length();
// 矩阵中如果matrix[m][n]的值不等于matrix[m-1][n]的值也不等于matrix[m][n-1]的值
// 则matrix[m][n]对应的字符为相似字符元并将其存入result数组中
char[] result = new char[matrix[m][n]];
int currentIndex = result.length - 1;
while (matrix[m][n] != 0) {
if (matrix[m][n] == matrix[m][n - 1]) {
n--;
} else if (matrix[m][n] == matrix[m - 1][n]) {
m--;
} else {
result[currentIndex] = strA.charAt(m - 1);
currentIndex--;
n--;
m--;
}
}
return new String(result);
}
// --------------------------------------------------------------------------------------------------- Private method start
/**
* 将字符串的所有数据依次写成一行去除无意义字符串
@ -94,7 +133,20 @@ public class TextSimilarity {
* @param strB 字符串2
* @return 公共子串
*/
private static String longestCommonSubstring(String strA, String strB) {
private static int longestCommonSubstringLength(String strA, String strB) {
final int m = strA.length();
final int n = strB.length();
return generateMatrix(strA, strB)[m][n];
}
/**
* 求公共子串采用动态规划算法 其不要求所求得的字符在所给的字符串中是连续的
*
* @param strA 字符串1
* @param strB 字符串2
* @return 公共串矩阵
*/
private static int[][] generateMatrix(String strA, String strB) {
int m = strA.length();
int n = strB.length();
@ -111,23 +163,7 @@ public class TextSimilarity {
}
}
// 矩阵中如果matrix[m][n]的值不等于matrix[m-1][n]的值也不等于matrix[m][n-1]的值
// 则matrix[m][n]对应的字符为相似字符元并将其存入result数组中
char[] result = new char[matrix[m][n]];
int currentIndex = result.length - 1;
while (matrix[m][n] != 0) {
if (matrix[m][n] == matrix[m][n - 1]) {
n--;
} else if (matrix[m][n] == matrix[m - 1][n]) {
m--;
} else {
result[currentIndex] = strA.charAt(m - 1);
currentIndex--;
n--;
m--;
}
}
return new String(result);
return matrix;
}
// --------------------------------------------------------------------------------------------------- Private method end
}

View File

@ -9,15 +9,27 @@ import org.junit.Test;
*
*/
public class TextSimilarityTest {
@Test
public void similarDegreeTest() {
String a = "我是一个文本,独一无二的文本";
String b = "一个文本,独一无二的文本";
double degree = TextSimilarity.similar(a, b);
Assert.assertEquals(0.8571428571428571D, degree, 16);
String similarPercent = TextSimilarity.similar(a, b, 2);
Assert.assertEquals("84.62%", similarPercent);
}
@Test
public void similarDegreeTest2() {
String a = "我是一个文本,独一无二的文本";
String b = "一个文本,独一无二的文本,#,>>?#$%^%$&^&^%";
double degree = TextSimilarity.similar(a, b);
Assert.assertEquals(0.8571428571428571D, degree, 16);
String similarPercent = TextSimilarity.similar(a, b, 2);
Assert.assertEquals("84.62%", similarPercent);
}
@ -27,4 +39,4 @@ public class TextSimilarityTest {
final double abd = TextSimilarity.similar("abd", "1111");
Assert.assertEquals(0, abd, 1);
}
}
}