1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
17 """
18 Compute the windowdiff score for a pair of segmentations. A segmentation is any sequence
19 over a vocabulary of two items (e.g. "0", "1"), where the specified boundary value is used
20 to mark the edge of a segmentation.
21
22 >>> s1 = "00000010000000001000000"
23 >>> s2 = "00000001000000010000000"
24 >>> s3 = "00010000000000000001000"
25 >>> windowdiff(s1, s1, 3)
26 0
27 >>> windowdiff(s1, s2, 3)
28 4
29 >>> windowdiff(s2, s3, 3)
30 16
31
32 @param seg1: a segmentation
33 @type seg1: C{string} or C{list}
34 @param seg2: a segmentation
35 @type seg2: C{string} or C{list}
36 @param k: window width
37 @type k: C{int}
38 @param boundary: boundary value
39 @type boundary: C{string} or C{int} or C{bool}
40 @rtype: C{int}
41 """
42
43 if len(seg1) != len(seg2):
44 raise ValueError, "Segmentations have unequal length"
45 wd = 0
46 for i in range(len(seg1) - k):
47 wd += abs(seg1[i:i+k+1].count(boundary) - seg2[i:i+k+1].count(boundary))
48 return wd
49
51 s1 = "00000010000000001000000"
52 s2 = "00000001000000010000000"
53 s3 = "00010000000000000001000"
54 print "s1:", s1
55 print "s2:", s2
56 print "s3:", s3
57
58 print "windowdiff(s1, s1, 3) = ", windowdiff(s1, s1, 3)
59 print "windowdiff(s1, s2, 3) = ", windowdiff(s1, s2, 3)
60 print "windowdiff(s2, s3, 3) = ", windowdiff(s2, s3, 3)
61