python3之协同过滤综述

浏览: 1456

协同过滤主要是计算人与人之间的距离,Python缔造的recommendations函数如下

####数据集(用户,电影名字,评分)

critics={'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,

'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,

'The Night Listener': 3.0},

'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,

'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,

'You, Me and Dupree': 3.5},

'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,

'Superman Returns': 3.5, 'The Night Listener': 4.0},

'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,

'The Night Listener': 4.5, 'Superman Returns': 4.0,

'You, Me and Dupree': 2.5},

'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,

'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,

'You, Me and Dupree': 2.0},

'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,

'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},

'Toby': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0,'Superman Returns':4.0}}

###计算举例的函数(人与人之间相似度)

​def sim_pearson(prefs,p1,p2):

# Get the list of mutually rated items

si={}

for item in prefs[p1]:

if item in prefs[p2]: si[item]=1

# if they are no ratings in common, return 0

if len(si)==0: return 0

# Sum calculations

n=len(si)



# Sums of all the preferences

sum1=sum([prefs[p1][it] for it in si])

sum2=sum([prefs[p2][it] for it in si])



# Sums of the squares

sum1Sq=sum([pow(prefs[p1][it],2) for it in si])

sum2Sq=sum([pow(prefs[p2][it],2) for it in si])



# Sum of the products

pSum=sum([prefs[p1][it]*prefs[p2][it] for it in si])



# Calculate r (Pearson score)

num=pSum-(sum1*sum2/n)

den=sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))

if den==0: return 0

r=num/den

return r

####推荐方法

def getRecommendations(prefs,person,similarity=sim_pearson):

totals={}

simSums={}

for other in prefs:

# don't compare me to myself

if other==person: continue

sim=similarity(prefs,person,other)

# ignore scores of zero or lower

if sim<=0: continue

for item in prefs[other]:



# only score movies I haven't seen yet

if item not in prefs[person] or prefs[person][item]==0:

# Similarity * Score

totals.setdefault(item,0)

totals[item]+=prefs[other][item]*sim

# Sum of similarities

simSums.setdefault(item,0)

simSums[item]+=sim

# Create the normalized list

rankings=[(total/simSums[item],item) for item,total in totals.items()]

# Return the sorted list

rankings.sort()

rankings.reverse()

return rankings


​调用方法

import recommendations

t=recommendations.getRecommendations(recommendations.critics,'Toby')

print (t)

推荐 4
本文由 张聪 创作,采用 知识共享署名-相同方式共享 3.0 中国大陆许可协议 进行许可。
转载、引用前需联系作者,并署名作者且注明文章出处。
本站文章版权归原作者及原出处所有 。内容为作者个人观点, 并不代表本站赞同其观点和对其真实性负责。本站是一个个人学习交流的平台,并不用于任何商业目的,如果有任何问题,请及时联系我们,我们将根据著作权人的要求,立即更正或者删除有关内容。本站拥有对此声明的最终解释权。

1 个评论

闵科夫斯基距离?

要回复文章请先登录注册