求文本比较算法
发布网友
发布时间:2023-06-19 14:06
我来回答
共5个回答
热心网友
时间:2024-11-25 01:44
用java比较两个文本文件的不同
RangeDifferencer
public class RangeDifferencer {
private static final RangeDifference[] EMPTY_RESULT= new RangeDifference[0];
/* (non Javadoc)
* Cannot be instantiated!
*/
private RangeDifferencer() {
// nothing to do
}
/**
* Finds the differences between two <code>IRangeComparator</code>s.
* The differences are returned as an array of <code>RangeDifference</code>s.
* If no differences are detected an empty array is returned.
*
* @param left the left range comparator
* @param right the right range comparator
* @return an array of range differences, or an empty array if no differences were found
*/
public static RangeDifference[] findDifferences(IRangeComparator left, IRangeComparator right) {
int rightSize= right.getRangeCount();
int leftSize= left.getRangeCount();
//
// Differences matrix:
// only the last d of each diagonal is stored, i.e., lastDiagonal[k] = row of d
//
int diagLen= 2 * Math.max(rightSize, leftSize); // bound on the size of edit script
int maxDiagonal= diagLen;
int lastDiagonal[]= new int[diagLen + 1]; // the row containing the last d
// on diagonal k (lastDiagonal[k] = row)
int origin= diagLen / 2; // origin of diagonal 0
// script corresponding to d[k]
LinkedRangeDifference script[]= new LinkedRangeDifference[diagLen + 1];
int row, col;
// find common prefix
for (row= 0; row < rightSize && row < leftSize && rangesEqual(right, row, left, row) == true;)
row++;
lastDiagonal[origin]= row;
script[origin]= null;
int lower= (row == rightSize) ? origin + 1 : origin - 1;
int upper= (row == leftSize) ? origin - 1 : origin + 1;
if (lower > upper)
return EMPTY_RESULT;
//System.out.println("findDifferences: " + maxDiagonal + " " + lower + " " + upper);
// for each value of the edit distance
for (int d= 1; d <= maxDiagonal; ++d) { // d is the current edit distance
if (right.skipRangeComparison(d, maxDiagonal, left))
return EMPTY_RESULT; // should be something we already found
// for each relevant diagonal (-d, -d+2 ..., d-2, d)
for (int k= lower; k <= upper; k += 2) { // k is the current diagonal
LinkedRangeDifference edit;
if (k == origin - d || k != origin + d && lastDiagonal[k + 1] >= lastDiagonal[k - 1]) {
//
// move down
//
row= lastDiagonal[k + 1] + 1;
edit= new LinkedRangeDifference(script[k + 1], LinkedRangeDifference.DELETE);
} else {
//
// move right
//
row= lastDiagonal[k - 1];
edit= new LinkedRangeDifference(script[k - 1], LinkedRangeDifference.INSERT);
}
col= row + k - origin;
edit.fRightStart= row;
edit.fLeftStart= col;
//Assert.isTrue(k >= 0 && k <= maxDiagonal);
script[k]= edit;
// slide down the diagonal as far as possible
while (row < rightSize && col < leftSize && rangesEqual(right, row, left, col) == true) {
++row;
++col;
}
//Assert.isTrue(k >= 0 && k <= maxDiagonal); // Unreasonable value for diagonal index
lastDiagonal[k]= row;
if (row == rightSize && col == leftSize) {
//showScript(script[k], right, left);
return createDifferencesRanges(script[k]);
}
if (row == rightSize)
lower= k + 2;
if (col == leftSize)
upper= k - 2;
}
--lower;
++upper;
}
// too many differences
//Assert.isTrue(false);
return null;
}
/**
* Finds the differences among two <code>IRangeComparator</code>s.
* In contrast to <code>findDifferences</code>, the result
* contains <code>RangeDifference</code> elements for non-differing ranges too.
*
* @param left the left range comparator
* @param right the right range comparator
* @return an array of range differences
*/
public static RangeDifference[] findRanges(IRangeComparator left, IRangeComparator right) {
RangeDifference[] in= findDifferences(left, right);
List out= new ArrayList();
RangeDifference rd;
int mstart= 0;
int ystart= 0;
for (int i= 0; i < in.length; i++) {
RangeDifference es= in[i];
rd= new RangeDifference(RangeDifference.NOCHANGE, mstart, es.rightStart() - mstart, ystart, es.leftStart() - ystart);
if (rd.maxLength() != 0)
out.add(rd);
out.add(es);
mstart= es.rightEnd();
ystart= es.leftEnd();
}
rd= new RangeDifference(RangeDifference.NOCHANGE, mstart, right.getRangeCount() - mstart, ystart, left.getRangeCount() - ystart);
if (rd.maxLength() > 0)
out.add(rd);
return (RangeDifference[]) out.toArray(EMPTY_RESULT);
}
//---- private methods
/*
* Creates a Vector of DifferencesRanges out of the LinkedRangeDifference.
* It coalesces adjacent changes.
* In addition, indices are changed such that the ranges are 1) open, i.e,
* the end of the range is not included, and 2) are zero based.
*/
private static RangeDifference[] createDifferencesRanges(LinkedRangeDifference start) {
LinkedRangeDifference ep= reverseDifferences(start);
ArrayList result= new ArrayList();
RangeDifference es= null;
while (ep != null) {
es= new RangeDifference(RangeDifference.CHANGE);
if (ep.isInsert()) {
es.fRightStart= ep.fRightStart + 1;
es.fLeftStart= ep.fLeftStart;
RangeDifference b= ep;
do {
ep= ep.getNext();
es.fLeftLength++;
} while (ep != null && ep.isInsert() && ep.fRightStart == b.fRightStart);
} else {
es.fRightStart= ep.fRightStart;
es.fLeftStart= ep.fLeftStart;
RangeDifference a= ep;
//
// deleted lines
//
do {
a= ep;
ep= ep.getNext();
es.fRightLength++;
} while (ep != null && ep.isDelete() && ep.fRightStart == a.fRightStart + 1);
boolean change= (ep != null && ep.isInsert() && ep.fRightStart == a.fRightStart);
if (change) {
RangeDifference b= ep;
//
// replacement lines
//
do {
ep= ep.getNext();
es.fLeftLength++;
} while (ep != null && ep.isInsert() && ep.fRightStart == b.fRightStart);
} else {
es.fLeftLength= 0;
}
es.fLeftStart++; // meaning of range changes from "insert after", to "replace with"
}
//
// the script commands are 1 based, subtract one to make them zero based
//
es.fRightStart--;
es.fLeftStart--;
result.add(es);
}
return (RangeDifference[]) result.toArray(EMPTY_RESULT);
}
/*
* Tests if two ranges are equal
*/
private static boolean rangesEqual(IRangeComparator a, int ai, IRangeComparator b, int bi) {
return a.rangesEqual(ai, b, bi);
}
/*
* Tests whether <code>right</code> and <code>left</code> changed in the same way
*/
private static boolean rangeSpansEqual(IRangeComparator right, int rightStart, int rightLen, IRangeComparator left, int leftStart, int leftLen) {
if (rightLen == leftLen) {
int i= 0;
for (i= 0; i < rightLen; i++) {
if (!rangesEqual(right, rightStart + i, left, leftStart + i))
break;
}
if (i == rightLen)
return true;
}
return false;
}
/*
* Reverses the range differences
*/
private static LinkedRangeDifference reverseDifferences(LinkedRangeDifference start) {
LinkedRangeDifference ep, behind, ahead;
ahead= start;
ep= null;
while (ahead != null) {
behind= ep;
ep= ahead;
ahead= ahead.getNext();
ep.setNext(behind);
}
return ep;
}
}
下面是一段关于如何使用这些类的简单的测试代码
public class RangeDifferencerTest extends TestCase {
InputStream left = null;
InputStream right = null;
/**
* @see junit.framework.TestCase#setUp()
*/
protected void setUp() throws Exception {
String file1 = "d:/temp/1.txt";
String file2 = "d:/temp/2.txt";
left = new FileInputStream(new File(file1));
right = new FileInputStream(new File(file2));
super.setUp();
}
/**
* @see junit.framework.TestCase#tearDown()
*/
protected void tearDown() throws Exception {
left.close();
right.close();
super.tearDown();
}
public static void main(String[] args) {
}
/*
* Test method for 'com.greatroad.smbnm.compare.RangeDifferencer.findDifferences(IRangeComparator, IRangeComparator)'
*/
public void testFindDifferences() {
try {
RangeDifference[] rds = RangeDifferencer.findRanges(new LineComparator(left,"GBK"),new LineComparator(right,"GBK"));
if(rds != null ){
for(int i=0; i<rds.length; i++){
RangeDifference rd = rds[i];
int length = rd.leftLength();
System.out.println(
"kind = "+rd.kind()
+",left["+rd.leftStart()+"-"+rd.leftEnd()
+"],right["+rd.rightStart()+"-"+rd.rightEnd()+"]");
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
热心网友
时间:2024-11-25 01:44
KMP比较算法
传统比较需要逐个比较字符
KMP比较算法分2次
假设A与B比较
1.在A中找到所有与B的字符串的第一个字符相同的字符位置,做索引
2.在第一步的工作之上从索引的所有字符位置处对B做详细比较,最终得出结果
KMP算法能减少比较次数
热心网友
时间:2024-11-25 01:44
如果都是以行为操作原子的话,可以将文本中的内容以换行符为单位存成一个个的string,然后放在STL的list里。
两个文本存两个list。然后从头到尾比较两个list中的string。这样可以得出哪里多行哪里少行了吧?
再搞两个list把多出的string存起来,比较可以找出那些行存在顺序颠倒的。
烂是烂了点,不过看你做来干什么用了。
热心网友
时间:2024-11-25 01:45
以行为单位,建立几个数组
Line1 ---> A ×× (删除)
Line2 ---> B Line9 (可能是调整了行序)
Line3 ---> C Line1
Line4 ---> D Line2 (平行……)
....
LineX1 ---> YA (新建)
A、B……为搜索结果,1、2为原始行
搜索结果有3种:新建映射、删除映射、平行映射(未更改)、乱序映射(调整行序)
让后根据要求输出。这里认为平行即为未更改,当然只是判断标准之一
热心网友
时间:2024-11-25 01:46
word可以吧
不行就用文本替换