33# Brief:
44import time
55from eclat import eclat_zc
6- from freq_utils import loadDblpData , loadData , loadUnixData
6+ from freq_utils import loadDblpData , load_title_data , printDataSet , save_freqItems
77from apriori import apriori_zc
88from fp_growth import fp_growth
99
10+
1011def test_fp_growth (minSup , dataSetDict , dataSet ):
1112 freqItems = fp_growth (dataSetDict , minSup )
1213 freqItems = sorted (freqItems .items (), key = lambda item : item [1 ])
@@ -32,6 +33,7 @@ def print_freqItems(logo, freqItems):
3233 print (len (freqItems ))
3334 print ("-------------------" , logo , " end ---------------" )
3435
36+
3537def do_experiment_data_size ():
3638 data_name = 'unixData8_pro.txt'
3739 x_name = "Data_Size"
@@ -151,7 +153,7 @@ def do_test():
151153def do_dblp_data ():
152154 data_name = 'dblpDataAll.txt'
153155 x_name = "Min_Support"
154- data_num = 2715700
156+ data_num = 980
155157 minSup = 100
156158 dataSetDict , dataSet = loadDblpData (("dataSet/" + data_name ), ',' , data_num )
157159
@@ -165,9 +167,27 @@ def do_dblp_data():
165167 print (item )
166168
167169
168- if __name__ == '__main__' :
169- x_value , y_value = do_experiment_min_support ()
170- x_value , y_value = do_experiment_data_size ()
171- do_test ()
170+ def do_title_data ():
171+ data_name = 'title.txt'
172+ x_name = "Min_Support"
173+ data_num = 22846
174+ minSup = data_num / 100
175+ dataSetDict , dataSet = load_title_data (("dataSet/" + data_name ), ',' , data_num )
176+ printDataSet (dataSet [:10 ])
177+ time_fp = 0
178+ ticks0 = time .time ()
179+ freqItems_fp = test_eclat (minSup , dataSetDict , dataSet )
180+ time_fp += time .time () - ticks0
181+ print (time_fp )
182+
183+ print (freqItems_fp [:10 ])
184+ save_freqItems (freqItems_fp , "dataSet/title_out.txt" )
172185
186+
187+ if __name__ == '__main__' :
188+ # x_value, y_value = do_experiment_min_support()
189+ # x_value, y_value = do_experiment_data_size()
190+ # do_test()
191+ #
173192 do_dblp_data ()
193+ # do_title_data()
0 commit comments