forked from shibing624/python-tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path02.tagger_performance.py
More file actions
33 lines (26 loc) · 1.02 KB
/
02.tagger_performance.py
File metadata and controls
33 lines (26 loc) · 1.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# -*- coding: utf-8 -*-
"""
@description: user define tagger performance
@author:XuMing
"""
from __future__ import print_function # 兼容python3的print写法
from __future__ import unicode_literals # 兼容python3的编码处理
import nltk
from nltk.corpus import brown
# user define tagger performance
def performance(cfd, wordlist):
lt = dict((word, cfd[word].max()) for word in wordlist)
baseline_tagger = nltk.UnigramTagger(model=lt, backoff=nltk.DefaultTagger('NN'))
return baseline_tagger.evaluate(brown.tagged_sents(categories='news'))
def display():
import pylab
words_by_freq = list(nltk.FreqDist(brown.words(categories='news')))
cfd = nltk.ConditionalFreqDist(brown.tagged_words(categories='news'))
sizes = 2 ** pylab.arange(15)
perfs = [performance(cfd, words_by_freq[:size]) for size in sizes]
pylab.plot(sizes, perfs, '-bo')
pylab.title("lookup tagger performance with varying model size")
pylab.xlabel('model size')
pylab.ylabel('performance')
pylab.show()
display()