d1 = {'weight':1, 'data': { 'apples': 8, 'oranges': 7 } }
d2 = {'weight':3, 'data': { 'apples': 4, 'bananas': 3 } }
all_dictionaries = [d1, d2, ... ]
def mergeDictionariesWithWeig开发者_如何学Goht(all_dictionaries)
How do I merge these dictionaries together (if overlap, multiple value with the weight)
The function would return:
{ 'apples': 4, 'oranges': 7, 'bananas': 3 }
Apples is 4
because 8 * .25 + 4 * .75
Edit: I just wrote one that takes the average, something like this. But of course it's really different from what I want to do, because I stick everything in a list and just divide by the length.
result = {}
keymap = {}
for the_dict in dlist:
for (k, v) in the_dict.items():
if not keymap.has_key(k):
keymap[k] = []
keymap[k].append(v)
for (k, v) in keymap.items():
average = sum(int(x) for x in keymap[k]) / float(len(keymap[k]))
result[k] = float(average)
return result
>>> from collections import defaultdict
>>> d=defaultdict(lambda:(0,0))
>>> for D in all_dictionaries:
... weight = D['weight']
... for k,v in D['data'].items():
... d[k]=d[k][0]+weight*v,d[k][1]+weight
...
>>> dict((k,v[0]/v[1]) for k,v in d.items())
{'apples': 5, 'oranges': 7, 'bananas': 3}
If you need floating point result
>>> dict((k,1.*v[0]/v[1]) for k,v in d.items())
{'apples': 5.0, 'oranges': 7.0, 'bananas': 3.0}
Notes about defaultdict
Often you see defaultdict(int)
or defaultdict(list)
maybe even defaultdict(set)
. The argument to defaultdict must be callable with no parameters. The result of calling this parameter is used whenever a key is found to be missing. ie - calling this returns the default value for the dictionary
for example
>>> d=defaultdict(int)
>>> d[1]
0
>>> d['foo']
0
This is often used for counting things up because int()
returns 0. If you want the default value to be 1 instead of 0, it's more tricky because you can't pass a parameter to int, but all you need is a callable that returns 1. This can be accomplished without too much fuss by using a lambda function.
>>> d=defaultdict(lambda:1)
>>> d[1]
1
>>> d['foo']
1
In this answer, I want to keep track of the weighted total, and the total of the weights. I can do this by using a 2-tuple as the default value.
>>> d=defaultdict(lambda:(0,0))
>>> d[1]
(0, 0)
>>> d['foo']
(0, 0)
Here's a solution that first uses gathers the items into a list using a temporary dict, and then computes the final weighted dict. It can probably be done without a temporary, but this is easy to understand.
from collections import defaultdict
def mergeDictionariesWithWeight(dlist):
tmp = defaultdict(list)
for d in dlist:
weight = d['weight']
for k, v in d['data'].items():
tmp[k].append((weight, v))
r = {}
for k, v in tmp.items():
# If there's just one item, ignore the weight
if len(v) == 1:
r[k] = v[0][1]
else:
total_weight = sum((x[0] for x in v), 0.0)
r[k] = sum(x[1] * x[0]/total_weight for x in v)
return r
Returns: {'apples': 5.0, 'oranges': 7, 'bananas': 3} (because 8 * .25 + 4 * .75 = 5.0)
try this:
def mergeDictionariesWithWeight(all_dictionaries): weightSum = 0 weightDictionary ={} for dictionary in all_dictionaries: weight = dictionary['weight'] data = dictionary['data'] #find the total weight of the elements in data for (k,v) in data.items(): if k in weightDictionary: weightDictionary[k] += weight*v weightSum += weight #normalize the results by deviding by the weight sum for (key, value) in weightDictionary: weightDictionary[key] = value / float(weightSum) return weightDictionary d1 = {'weight':1, 'data': { 'apples': 8, 'oranges': 7 } } d2 = {'weight':3, 'data': { 'apples': 4, 'bananas': 3 } } all_dictionaries = [d1, d2] mergeDictionariesWithWeight(all_dictionaries)
from collections import defaultdict
def merge_dictionaries_with_weight(all_dictionaries):
totals = defaultdict(int)
result = defaultdict(int)
for each in all_dictionaries:
weight = float(each['weight'])
for key, value in each['data'].items():
totals[key] += weight
result[key] += weight * value
for key, total in totals.items():
result[key] /= total
return result
Algorithmically indistinguishable from gnibbler's, but somehow the generator expression pleases me.
>>> from collections import defaultdict
>>> weights, values = defaultdict(int), defaultdict(int)
>>> key_weight_value = ((key, d['weight'], value)
for d in all_dictionaries
for key, value in d['data'].iteritems())
>>> for k, w, v in key_weight_value:
... weights[k], values[k] = weights[k] + w, values[k] + w * v
...
>>> dict((k, values[k] * 1.0 / weights[k]) for k in weights)
{'apples': 5.0, 'oranges': 7.0, 'bananas': 3.0}
精彩评论