1
2
3
4
5
6
7
8 """
9 Tools for comparing ranked lists.
10 """
11
13 """Finds the difference between the values in ranks1 and ranks2 for keys
14 present in both dicts. If the arguments are not dicts, they are converted
15 from (key, rank) sequences.
16 """
17 ranks1 = dict(ranks1)
18 ranks2 = dict(ranks2)
19 for k, v1 in ranks1.iteritems():
20 try:
21 yield k, v1 - ranks2[k]
22 except KeyError:
23 pass
24
25
27 """Returns the Spearman correlation coefficient for two rankings, which
28 should be dicts or sequences of (key, rank). The coefficient ranges from
29 -1.0 (ranks are opposite) to 1.0 (ranks are identical), and is only
30 calculated for keys in both rankings (for meaningful results, remove keys
31 present in only one list before ranking)."""
32 n = 0
33 res = 0
34 for k, d in _rank_dists(ranks1, ranks2):
35 res += d * d
36 n += 1
37 try:
38 return 1 - (6 * float(res) / (n * (n*n - 1)))
39 except ZeroDivisionError:
40
41 return 0.0
42
43
45 """Given a sequence, yields each element with an increasing rank, suitable
46 for use as an argument to L{spearman_correlation}.
47 """
48 return ((k, i) for i, k in enumerate(seq))
49
50
52 """Given a sequence of (key, score) tuples, yields each key with an
53 increasing rank, tying with previous key's rank if the difference between
54 their scores is less than rank_gap. Suitable for use as an argument to
55 L{spearman_correlation}.
56 """
57 prev_score = None
58 rank = 0
59 for i, (key, score) in enumerate(scores):
60 try:
61 if abs(score - prev_score) > rank_gap:
62 rank = i
63 except TypeError:
64 pass
65
66 yield key, rank
67 prev_score = score
68