Levenshtein Distance + LCS 算法计算两个字符串的相似度

    xiaoxiao2023-03-16  4

    //LD最短编辑路径算法 public static int LevenshteinDistance(string source, string target) { int cell = source.Length; int row = target.Length; if (cell == 0) { return row; } if (row == 0) { return cell; } int[, ] matrix = new int[row + 1, cell + 1]; for (var i = 0; i <= cell; i++) { matrix[0, i] = i; } for (var j = 1; j <= row; j++) { matrix[j, 0] = j; } var tmp = 0; for (var k = 0; k < row; k++) { for (var l = 0; l < cell; l++) { if (source[l].Equals(target[k])) tmp = 0; else tmp = 1; matrix[k + 1, l + 1] = Math.Min(Math.Min(matrix[k, l] + tmp, matrix[k + 1, l] + 1), matrix[k, l + 1] + 1); } } return matrix[row, cell]; } //LCS最大公共序列算法 public static int LongestCommonSubsequence(string source, string target) { if (source.Length == 0 || target.Length == 0) return 0; int len = Math.Max(target.Length, source.Length); int[, ] subsequence = new int[len + 1, len + 1]; for (int i = 0; i < source.Length; i++) { for (int j = 0; j < target.Length; j++) { if (source[i].Equals(target[j])) subsequence[i + 1, j + 1] = subsequence[i, j] + 1; else subsequence[i + 1, j + 1] = 0; } } int maxSubquenceLenght = (from sq in subsequence.Cast < int > () select sq).Max < int > (); return maxSubquenceLenght; } //计算两个字符串相似度 数值越大越相似 public static float StringSimilarity(string source, string target) { var ld = LevenshteinDistance(source, target); var lcs = LongestCommonSubsequence(source, target); return ((float)lcs)/(ld+lcs);; } /// <summary> /// 获取两个字符串的相似度(适合中文) /// </summary> /// <param name=”sourceString”>第一个字符串</param> /// <param name=”str”>第二个字符串</param> /// <returns></returns> public static double SimilarityWith(string sourceString, string str) { char[] ss = sourceString.ToCharArray(); char[] st = str.ToCharArray(); int t = 0; //命中 int k = 0; //非命中 foreach (var item in st) { if (ss.Contains(item)) { t++; } else { k++; } } return (double)t / ((double)k + (double)t); }
    转载请注明原文地址: https://ju.6miu.com/read-1152675.html
    最新回复(0)