Package nltk :: Package metrics :: Module spearman
[hide private]
[frames] | no frames]

Source Code for Module nltk.metrics.spearman

 1  # Natural Language Toolkit: Spearman Rank Correlation 
 2  # 
 3  # Copyright (C) 2001-2011 NLTK Project 
 4  # Author: Joel Nothman <jnothman@student.usyd.edu.au> 
 5  # URL: <http://nltk.org> 
 6  # For license information, see LICENSE.TXT 
 7   
 8  """ 
 9  Tools for comparing ranked lists. 
10  """ 
11   
12 -def _rank_dists(ranks1, ranks2):
13 """Finds the difference between the values in ranks1 and ranks2 for keys 14 present in both dicts. If the arguments are not dicts, they are converted 15 from (key, rank) sequences. 16 """ 17 ranks1 = dict(ranks1) 18 ranks2 = dict(ranks2) 19 for k, v1 in ranks1.iteritems(): 20 try: 21 yield k, v1 - ranks2[k] 22 except KeyError: 23 pass
24 25
26 -def spearman_correlation(ranks1, ranks2):
27 """Returns the Spearman correlation coefficient for two rankings, which 28 should be dicts or sequences of (key, rank). The coefficient ranges from 29 -1.0 (ranks are opposite) to 1.0 (ranks are identical), and is only 30 calculated for keys in both rankings (for meaningful results, remove keys 31 present in only one list before ranking).""" 32 n = 0 33 res = 0 34 for k, d in _rank_dists(ranks1, ranks2): 35 res += d * d 36 n += 1 37 try: 38 return 1 - (6 * float(res) / (n * (n*n - 1))) 39 except ZeroDivisionError: 40 # Result is undefined if only one item is ranked 41 return 0.0
42 43
44 -def ranks_from_sequence(seq):
45 """Given a sequence, yields each element with an increasing rank, suitable 46 for use as an argument to L{spearman_correlation}. 47 """ 48 return ((k, i) for i, k in enumerate(seq))
49 50
51 -def ranks_from_scores(scores, rank_gap=1e-15):
52 """Given a sequence of (key, score) tuples, yields each key with an 53 increasing rank, tying with previous key's rank if the difference between 54 their scores is less than rank_gap. Suitable for use as an argument to 55 L{spearman_correlation}. 56 """ 57 prev_score = None 58 rank = 0 59 for i, (key, score) in enumerate(scores): 60 try: 61 if abs(score - prev_score) > rank_gap: 62 rank = i 63 except TypeError: 64 pass 65 66 yield key, rank 67 prev_score = score
68