File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ # -*- coding: utf-8 -*-
2+ # Author: XuMing <[email protected] > 3+ # Brief:
4+
5+ from sklearn .feature_extraction .text import TfidfVectorizer
6+
7+ corpus = ["I come to China to travel" ,
8+ "This is a car polupar in China" ,
9+ "I love tea and Apple " ,
10+ "The work is to write some papers in science" ]
11+
12+ vectorizer = TfidfVectorizer ()
13+
14+ tfidf = vectorizer .fit_transform (corpus )
15+ print (tfidf )
16+ print ('vocab:' )
17+ print (vectorizer .vocabulary_ )
18+ word = vectorizer .get_feature_names () # 获取词袋模型中的所有词语
19+ weight = tfidf .toarray () # 将tf-idf矩阵抽取出来,元素a[i][j]表示j词在i类文本中的tf-idf权重
20+ for i in range (len (weight )): # 打印每类文本的tf-idf词语权重,第一个for遍历所有文本,第二个for便利某一类文本下的词语权重
21+ print (u"-------这里输出第" , i , u"类文本的词语tf-idf权重------" )
22+ k_v = dict ()
23+ for j in range (len (word )):
24+ print (word [j ], weight [i ][j ])
25+ k_v [word [j ]] = weight [i ][j ]
26+ sorts = sorted (k_v .items (), key = lambda d :d [1 ],reverse = True )
27+ print (sorts [:5 ])
You can’t perform that action at this time.
0 commit comments