关联规则算法学习—Apriori
一、实验项目:关联规则算法学习
项目性质:设计型二、实验目的: 理解并掌握关联规则经典算法Apriori算法,理解算法的原理,能够实现算法,并对给定的数据集进行关联规则挖掘
三、实验内容:
1、实现Apriori算法,验证算法的正确性,并将算法应用于给定的数据集Groceries,根据设定的支持度和置信度,挖掘出符合条件的频繁项集及关联规则。
2、挑选几个有代表性的频繁项集和关联规则,记录下来。 3、调节支持度和置信度阈值,重新执行算法,比较结果的不同。
# coding=utf-8
import pandas as pd
import numpy as npdef getDataSet():DataSet = pd.read_csv('Groceries.csv', encoding='UTF-8')dataSet = np.array(DataSet).tolist()columns = np.array(DataSet.columns).tolist()data = []for d in dataSet:d[1] = str(d[1]).split(' ') # str(d[1]).split(' ') 元素集转化为单个data.append(d[1])return data, columnsdef createItems(dataSet):Items = []for d in dataSet:for item in d:if not [item] in Items: # list 用 not[item] in listItems.append([item])Items.sort()return map(frozenset, Items)def createSupportItem(D, Items, MinSupport):X = {}dataSet = list(D)items = list(Items)sumItem = float(len(dataSet))for d in dataSet:for item in items: # 候选集if item.issubset(d): # 候选集为item子集if not item in X:X[item] = 1 # 不存在就创建,存在就加一else:X[item] += 1supportItems = []supportData = {}for k in X.keys():support = X[k] / float(sumItem) # 支持度if support >= MinSupport:supportItems.insert(0, k)supportData[k] = supportreturn supportItems, supportDatadef AprioriConf(Lk, k): # 计算K频繁项集# Lk 是上一个频繁项集 last# k是创建的项集数retList = []lenLk = len(Lk)for i in range(lenLk):for j in range(i + 1, lenLk):L1 = list(Lk[i])[:k - 2]L2 = list(Lk[j])[:k - 2]L1.sort()L2.sort()if L1 == L2:retList.append(Lk[i] | Lk[j])return retListdef Apriori(dataSet, minSupport):Items = createItems(dataSet)D = map(set, dataSet)L1, supportData = createSupportItem(D, Items, minSupport)L = [L1]k = 2while (len(L[k - 2]) > 0):Ck = AprioriConf(L[k - 2], k)Lk, Supk = createSupportItem(map(set, dataSet), Ck, MinSupport=minSupport)supportData.update(Supk)L.append(Lk)k += 1return L, supportDatadef main():dataSet, columns = getDataSet() # dataSet中仅有项目集,没有订单集L, Support = Apriori(dataSet, 0.5)print('所有频繁项集L:')for l in L:print(l)print('对应支持度Support:')for k, v in Support.items():print('项目集:', k, '的支持度为:', v)if __name__ == '__main__':main()
运行结果: