开发者

Positional Rankings and Dealing with Ties in Python

开发者 https://www.devze.com 2022-12-21 04:06 出处:网络
(I apologize that previous versions of this question displayed the wrong function that I need to fix, this has been remedied and I hope the question makes a little more sense now.)

(I apologize that previous versions of this question displayed the wrong function that I need to fix, this has been remedied and I hope the question makes a little more sense now.)

I ha开发者_Python百科ve a list of objects with scores, and I'm attempting to assign rank to them based on those scores. Below is basically how I output my data.

sorted_scores = [
    ('Apolo Ohno', 0),
    ('Shanie Davis', -1),
    ('Bodie Miller', -2),
    ('Lindsay Vohn', -3),  
    ('Shawn White', -3),
    ('Bryan Veloso', -4)
]

I have a tie. Now, the function that assigns positions to the objects above right now is a simple for loop that just assigns the value of i as the object's final position.

positions = {}

i = 1
for key, value in sorted_list:
    # Since in my codebase the strings are IDs, I use the key to fetch the object.
    if value is not None:
        positions[key] = i
        i += 1

So that'll obviously return:

positions = {
    'Apolo Ohno': 1,
    'Shanie Davis': 2,
    'Bodie Miller': 3,
    'Lindsay Vohn': 4,        
    'Shawn White': 5,
    'Bryan Veloso': 6
}

Hopefully that makes some sense. The meat of the question is that loop. What makes more sense is if it returns them like so:

positions = {
    'Apolo Ohno': 1,
    'Shanie Davis': 2,
    'Bodie Miller': 3,
    'Lindsay Vohn': 4, # Same value.
    'Shawn White': 4, # Same value.
    'Bryan Veloso': 6
}

How would I edit the function above to do that, keeping in mind that I could have any number of ties at any given time depending on how many of my members ranked said object? The highest rank should be 1, so it can be displayed as such: <rank>/<total # of people>

Thanks in advance. :)


>>> sorted_scores = [
...     ('Apolo Ohno', 0),
...     ('Shanie Davis', -1),
...     ('Bodie Miller', -2),
...     ('Lindsay Vohn', -3),  
...     ('Shawn White', -3),
...     ('Bryan Veloso',-4)
... ]
>>> 
>>> res = {}
>>> prev = None
>>> for i,(k,v) in enumerate(sorted_scores):
...     if v!=prev:
...         place,prev = i+1,v
...     res[k] = place
... 
>>> print res
{'Apolo Ohno': 1, 'Bryan Veloso': 6, 'Shanie Davis': 2, 'Lindsay Vohn': 4, 'Bodie Miller': 3, 'Shawn White': 4}

Remember that dicts are unordered, so to iterate in order of place, you need to do this

>>> from operator import itemgetter
>>> print sorted(res.items(),key=itemgetter(1))
[('Apolo Ohno', 1), ('Shanie Davis', 2), ('Bodie Miller', 3), ('Lindsay Vohn', 4), ('Shawn White', 4), ('Bryan Veloso', 6)]


=== Update after change/clarification of specs ===

# coding: ascii

def ranks_from_scores(sorted_scores):
    """sorted_scores: a list of tuples (object_id, score), sorted by score DESCENDING
       return a mapping of object IDs to ranks
    """
    ranks = {}
    previous_score = object()
    for index, (obj_id, score) in enumerate(sorted_scores):
        if score != previous_score:
            previous_score = score
            rank = index + 1
        ranks[obj_id] = rank
    return ranks

from operator import itemgetter
import pprint

scores0 = dict([
    ('Apolo Ohno', 0),
    ('Shanie Davis', -1),
    ('Bodie Miller', -2),
    ('Lindsay Vohn', -3),
    ('Shawn White', -3)
    ])

scores1 = {
    'lorem': 100,
    'ipsum': 200,
    'dolor': 300,
    'sit': 300,
    'amet': 300,
    'quia': 400,
    'consectetur': 500,
    'adipiscing': 500,
    'elit': 600,
    }

scores2 = {
    'lorem': 100,
    'ipsum': 200,
    'dolor': 300,
    'sit': 300,
    'amet': 300,
    'quia': 400,
    'consectetur': 500,
    'adipiscing': 500,
    'elit': 6000,
    }

import pprint
funcs = (ranks_from_scores, ) # Watch this space!
tests = (scores0, scores1, scores2)

for test in tests:
    print
    test_list = sorted(test.items(), key=itemgetter(1), reverse=True)
    print "Input:", test_list
    for func in funcs:
        result = func(test_list)
        print "%s ->" % func.__name__
        pprint.pprint(result)

Results:

Input: [('Apolo Ohno', 0), ('Shanie Davis', -1), ('Bodie Miller', -2), ('Lindsay
 Vohn', -3), ('Shawn White', -3)]
ranks_from_scores ->
{'Apolo Ohno': 1,
 'Bodie Miller': 3,
 'Lindsay Vohn': 4,
 'Shanie Davis': 2,
 'Shawn White': 4}

Input: [('elit', 600), ('consectetur', 500), ('adipiscing', 500), ('quia', 400),
 ('dolor', 300), ('sit', 300), ('amet', 300), ('ipsum', 200), ('lorem', 100)]
ranks_from_scores ->
{'adipiscing': 2,
 'amet': 5,
 'consectetur': 2,
 'dolor': 5,
 'elit': 1,
 'ipsum': 8,
 'lorem': 9,
 'quia': 4,
 'sit': 5}

Input: [('elit', 6000), ('consectetur', 500), ('adipiscing', 500), ('quia', 400)
, ('dolor', 300), ('sit', 300), ('amet', 300), ('ipsum', 200), ('lorem', 100)]
ranks_from_scores ->
{'adipiscing': 2,
 'amet': 5,
 'consectetur': 2,
 'dolor': 5,
 'elit': 1,
 'ipsum': 8,
 'lorem': 9,
 'quia': 4,
 'sit': 5}

=== original submission ===

This code assumes that you really want the highest score to get rank 1, not the lowest score getting rank 1 (or 0!).

# coding: ascii

def ranks_from_scores(scores, debug=False):
    """scores (a mapping of object IDs to scores)
       return a mapping of object IDs to ranks
    """
    alist = [(v, k) for k, v in scores.items()]
    alist.sort(reverse=True)
    if debug: print 'alist:', alist
    bdict = {}
    previous_score = object()
    for posn, (score, obj_id) in enumerate(alist):
        if score != previous_score:
            previous_score = score
            rank = posn + 1
        bdict[obj_id] = rank
    if debug:
        print 'bdict:', bdict
        blist = [(v, k) for k, v in bdict.items()]
        print 'blist:', sorted(blist)
    return bdict

ranks_from_scores(
    {'q': 10, 'w': 20, 'e': 20, 'r': 20, 't': 30},
    debug=True,
    )

Output:

alist: [(30, 't'), (20, 'w'), (20, 'r'), (20, 'e'), (10, 'q')]
bdict: {'q': 5, 'r': 2, 'e': 2, 't': 1, 'w': 2}
blist: [(1, 't'), (2, 'e'), (2, 'r'), (2, 'w'), (5, 'q')]


The way to do this is not to calculate the element's position is some arbitrary sequence, but rather to calculate how many other elements have a better score.

EDIT:

By popular demand, O(n)'ed and everything:

positions = {}
cur_score = None # Score we're examining
cur_count = 0 # Number of others that we've seen with this score

for ix, (name, score) in enumerate(sorted_scores):
  if score == cur_score: # Same score for this player as previous
    cur_count += 1
  else: # Different score from before
    cur_score = score
    cur_count = 0
  positions[name] = ix - cur_count + 1 # Add 1 because ix is 0-based

print positions


Looks like you can use the sorted and enumerate builtins, the groupby method from itertools and the itemgetter method from operator. Assumes higher scores are better... (if lower scores are better, change reverse=True to reverse=False)

>>> from itertools import groupby
>>> from operator import itemgetter
>>> scores = {
...     'lorem': 100,
...     'ipsum': 200,
...     'dolor': 300,
...     'sit': 300,
...     'amet': 300,
...     'quia': 400,
...     'consectetur': 500,
...     'adipiscing': 500,
...     'elit': 600,
...     }
>>> sorted_items = sorted(scores.items(), key=itemgetter(1), reverse=True)
>>> groups = groupby(sorted_items, itemgetter(1))
>>> for rank, (score, items) in enumerate(groups):
...     print rank+1, map(itemgetter(0), items)
... 
1 ['elit']
2 ['consectetur', 'adipiscing']
3 ['quia']
4 ['dolor', 'sit', 'amet']
5 ['ipsum']
6 ['lorem']


Solution

Here's one simple way to do it by modifying your code a little rather than importing modules:

prev = None
rank = 0
incr = 1
positions = {}
for key, value in sorted_list:
    if value is not None:
        if value != prev:
            rank += incr
            incr = 1
        else:
            incr += 1
        positions[key] = rank
        prev = value

A Test

For

sorted_list = [
    ('Apolo Ohno', 0),
    ('Shanie Davis', -1),
    ('Bodie Miller', -2),
    ('Lindsay Vohn', -3),  
    ('Shawn White', -3),
    ('Bryan Veloso',-4)
]

I get positions as:

{'Apolo Ohno': 1, 
'Shanie Davis': 2,
 'Bodie Miller': 3,
 'Lindsay Vohn': 4,
 'Shawn White': 4,
 'Bryan Veloso': 6}

which I think is what you are going for even though you aren't quite clear about whether there should be a 6 after the two 4's.


Here's an approach that combines aspects of a few of the other solutions into a flexible generator function.

def rank_sorted(sequence, start=1, key=None, reverse=True):
    """A combination of `enumerate` and `sorted` iterators that deals
    with tied ranks.

    """
    previous_value = object()  # won't compare equal to anything
    sorted_iterator = sorted(sequence, key=key, reverse=reverse)
    for index, item in enumerate(sorted_iterator, start=start):

        # use key function to choose value if given
        if key is None:
            value = item
        else:
            value = key(item)

        # only update rank when sort value changes
        if value != previous_value:
            previous_value = value
            rank = index

        yield rank, item

You can call with different values for start, key, and reverse to allow for ranks to start at 0 or 1, to pass a custom key function (like itemgetter(1) for sorting dictionaries by value), and to easily switch to having lower scores considered higher ranking. Using the example in the original question:

from operator import itemgetter

sorted_scores = [
    ('Apolo Ohno', 0),
    ('Shanie Davis', -1),
    ('Bodie Miller', -2),
    ('Lindsay Vohn', -3),  
    ('Shawn White', -3),
    ('Bryan Veloso', -4)
]

higher_is_better = dict(
    (name, rank)
    for rank, (name, score)
    in rank_sorted(sorted_scores, key=itemgetter(1))
)
# {'Apolo Ohno': 1, 'Bryan Veloso': 6, 'Shanie Davis': 2, 'Lindsay Vohn': 4, 'Bodie Miller': 3, 'Shawn White': 4}

lower_is_better = dict(
    (name, rank)
    for rank, (name, score)
    in rank_sorted(sorted_scores, key=itemgetter(1), reverse=False)
)
# {'Apolo Ohno': 6, 'Bryan Veloso': 1, 'Shanie Davis': 5, 'Lindsay Vohn': 2, 'Bodie Miller': 4, 'Shawn White': 2}


Same as the best answer, just keeping the ordinals instead of skiping to the next position at the ranking: i.e.: ranking = [1,2,3,4,4,5] instead of ranking = [1,2,3,4,4,6]

    sorted_scores = [
     ('Apolo Ohno', 0),
     ('Shanie Davis', -1),
     ('Bodie Miller', -2),
     ('Lindsay Vohn', -3),  
     ('Shawn White', -3),
     ('Bryan Veloso',-4)
]

res = {}
prev = None
n = 0 
for k,v in sorted_scores:
    if v!=prev:
        n +=1 
        place,prev = n,v
    res[k] = place

print (res)
{'Apolo Ohno': 1, 'Shanie Davis': 2, 'Bodie Miller': 3, 'Lindsay Vohn': 4, 'Shawn White': 4, 'Bryan Veloso': 5}


>>> sorted_scores = [
...     ('Apolo Ohno', 0),
...     ('Shanie Davis', -1),
...     ('Bodie Miller', -2),
...     ('Lindsay Vohn', -3),  
...     ('Shawn White', -3),
...     ('Bryan Veloso',-4)
... ]
>>> 
>>> from itertools import groupby
>>> from operator import itemgetter
>>> 
>>> place=1
>>> res={}
>>> for _,items in groupby(sorted_scores,key=itemgetter(1)):
...     for i,item in enumerate(items):
...         res[item[0]]= place
...     place+=i+1
... 
>>> print res
{'Apolo Ohno': 1, 'Bryan Veloso': 6, 'Shanie Davis': 2, 'Lindsay Vohn': 4, 'Bodie Miller': 3, 'Shawn White': 4}

Remember that dicts are unordered, so to iterate in order of place, you need to do this

>>> print sorted(res.items(),key=itemgetter(1))
[('Apolo Ohno', 1), ('Shanie Davis', 2), ('Bodie Miller', 3), ('Lindsay Vohn', 4), ('Shawn White', 4), ('Bryan Veloso', 6)]


Here is a simple way to do it

last = None
position = 0
delta = 1
for key, value in sorted_list:
    if value is not None:
        if value != last:
            position += delta
            delta = 1
        else:
            delta += 1
        # i believe this is supposed to be [key] not [value] in OP's code
        positions[key] = position
        last = value


I'm having to make a bunch of assumptions about what it is you want to do, but here's an attempt:

scores = {
    'lorem': 100,
    'ipsum': 200,
    'dolor': 300,
    'sit': 300,
    'amet': 300,
    'quia': 400,
    'consectetur': 500,
    'adipiscing': 500,
    'elit': 600,
}

groups = {}
for (member, score) in scores.items():
    if score not in groups:
        groups[score] = [member]
    else:
        groups[score].append(member)

positions = {}
for (rank, (score, members)) in enumerate(groups.items()):
    for member in members:
        positions[member] = rank

Expanded for detail, to show the working:

>>> import pprint
>>> scores = {
...     'lorem': 100,
...     'ipsum': 200,
...     'dolor': 300,
...     'sit': 300,
...     'amet': 300,
...     'quia': 400,
...     'consectetur': 500,
...     'adipiscing': 500,
...     'elit': 600,
... }
>>> groups = {}
>>> for (member, score) in scores.items():
...     if score not in groups:
...         groups[score] = [member]
...     else:
...         groups[score].append(member)
...
>>> pprint.pprint(groups)
{100: ['lorem'],
 200: ['ipsum'],
 300: ['sit', 'dolor', 'amet'],
 400: ['quia'],
 500: ['consectetur', 'adipiscing'],
 600: ['elit']}
>>> positions = {}
>>> for (rank, (score, members)) in enumerate(groups.items()):
...     for member in members:
...         positions[member] = rank
...
>>> pprint.pprint(positions)
{'adipiscing': 4,
 'amet': 2,
 'consectetur': 4,
 'dolor': 2,
 'elit': 5,
 'ipsum': 1,
 'lorem': 0,
 'quia': 3,
 'sit': 2}
>>> pprint.pprint(sorted(positions.items(), key=lambda i: i[1]))
[('lorem', 0),
 ('ipsum', 1),
 ('sit', 2),
 ('dolor', 2),
 ('amet', 2),
 ('quia', 3),
 ('consectetur', 4),
 ('adipiscing', 4),
 ('elit', 5)]
0

精彩评论

暂无评论...
验证码 换一张
取 消

关注公众号