Skip to content

Commit d4d77e5

Browse files
author
xuming
committed
更新情感分析测试用例.xuming 20170517
1 parent 84c31c5 commit d4d77e5

2 files changed

Lines changed: 34 additions & 23 deletions

File tree

08sentiment/Sentiment.py

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from __future__ import print_function # 兼容python3的print写法
77
from __future__ import unicode_literals # 兼容python3的编码处理
88

9-
import re, math, requests, json, string
9+
import re, math, string
1010
from itertools import product
1111
from inspect import getsourcefile
1212
from os.path import abspath, join, dirname
@@ -17,7 +17,7 @@
1717
C_INCR = 0.733
1818
N_SCALAR = -0.74
1919

20-
REGEX_REMOVE_PUNCTUATION = re.compile('[%s]' % re.escape(string.punctuation))
20+
REGEX_REMOVE_PUNCTUATION = re.compile('[{0}]'.format(re.escape(string.punctuation)))
2121
PUNC_LIST = [".", "!", "?", ",", ";", ":", "-", "'", "\"", "!!",
2222
"!!!", "??", "???", "?!?", "!?!", "?!?!", "!?!?"]
2323
NEGATE = ['aint', 'arent', 'cannot', 'cant', 'couldnt', 'darent', 'didnt',
@@ -103,7 +103,7 @@ def allcap_differential(words):
103103
if word.isupper():
104104
allcap_words += 1
105105
cap_differential = len(words) - allcap_words
106-
if cap_differential > 0 and cap_differential < len(words):
106+
if 0 < cap_differential < len(words):
107107
is_different = True
108108
return is_different
109109

@@ -122,7 +122,7 @@ def scalar_inc_dec(word, valence, is_cap_diff):
122122
scalar = BOOSTER_DICT[word_lower]
123123
if valence < 0:
124124
scalar *= -1
125-
# check if word in ALLCAPS
125+
# check if word in ALL CAPS
126126
if word.isupper() and is_cap_diff:
127127
if valence > 0:
128128
scalar += C_INCR
@@ -140,8 +140,8 @@ def __init__(self, text):
140140
if not isinstance(text, str):
141141
text = str(text.encode('utf-8'))
142142
self.text = text
143-
self.word_and_emoticons = self._words_and_emoticons()
144-
self.is_cap_diff = allcap_differential(self.word_and_emoticons)
143+
self.words_and_emoticons = self._words_and_emoticons()
144+
self.is_cap_diff = allcap_differential(self.words_and_emoticons)
145145

146146
def _words_plus_punc(self):
147147
"""
@@ -211,12 +211,13 @@ def polarity_scores(self, text):
211211
sentitext = SentiText(text)
212212

213213
sentiments = []
214-
words_and_emoticons = sentitext.word_and_emoticons
214+
words_and_emoticons = sentitext.words_and_emoticons
215215
for item in words_and_emoticons:
216216
valence = 0
217217
i = words_and_emoticons.index(item)
218218
if (i < len(words_and_emoticons) - 1 and item.lower() == "kind" and \
219-
words_and_emoticons[i + 1].lower() == "of") or item.lower() in BOOSTER_DICT:
219+
words_and_emoticons[i + 1].lower() == "of") or \
220+
item.lower() in BOOSTER_DICT:
220221
sentiments.append(valence)
221222
continue
222223

@@ -237,8 +238,10 @@ def sentiment_valence(self, valence, sentitext, item, i, sentiments):
237238
else:
238239
valence -= C_INCR
239240
for start_i in range(0, 3):
240-
if i > start_i and words_and_emoticons[i - (start_i + 1)].lower() not in self.lexicon:
241-
s = scalar_inc_dec(words_and_emoticons[i - (start_i + 1)], valence, is_cap_diff)
241+
if i > start_i and words_and_emoticons[i - (start_i + 1)].lower() \
242+
not in self.lexicon:
243+
s = scalar_inc_dec(words_and_emoticons[i - (start_i + 1)], \
244+
valence, is_cap_diff)
242245
if start_i == 1 and s != 0:
243246
s = s * 0.95
244247
if start_i == 2 and s != 0:
@@ -262,10 +265,11 @@ def _least_check(self, valence, words_and_emoticons, i):
262265
"""
263266
if i > 1 and words_and_emoticons[i - 1].lower() not in self.lexicon \
264267
and words_and_emoticons[i - 1].lower() == "least":
265-
if words_and_emoticons[i - 2].lower() != "at" and words_and_emoticons[i - 2].lower() != "very":
268+
if words_and_emoticons[i - 2].lower() != "at" and \
269+
words_and_emoticons[i - 2].lower() != "very":
266270
valence = valence * N_SCALAR
267-
elif i > 0 and words_and_emoticons[i - 1].lower() not in self.lexicon \
268-
and words_and_emoticons[i - 1].lower() == "least":
271+
elif i > 0 and words_and_emoticons[i - 1].lower() not in self.lexicon and \
272+
words_and_emoticons[i - 1].lower() == "least":
269273
valence = valence * N_SCALAR
270274
return valence
271275

@@ -331,24 +335,31 @@ def _never_check(self, valence, words_and_emoticons, start_i, i):
331335
valence = valence * N_SCALAR
332336
return valence
333337

334-
def _punctuation_emphasis(self, sum_s, text):
338+
def _punctuation_emphasis(self, text):
335339
# add emphasis
336340
ep_amplifier = self._amplify_ep(text)
337341
qm_amplifier = self._amplify_qm(text)
338342
punc_emph_amplifier = ep_amplifier + qm_amplifier
339343
return punc_emph_amplifier
340344

341-
def _amplify_eq(self, text):
342-
# check for added emphasis
345+
def _amplify_qm(self, text):
343346
qm_count = text.count("?")
344-
qm_amplifier = 0
347+
qm_amplifier = 0;
345348
if qm_count > 1:
346349
if qm_count <= 3:
347350
qm_amplifier = qm_count * 0.18
348351
else:
349352
qm_amplifier = 0.96
350353
return qm_amplifier
351354

355+
def _amplify_ep(self, text):
356+
# check for added emphasis
357+
ep_count = text.count("!")
358+
if ep_count > 4:
359+
ep_count = 4
360+
ep_amplifier = ep_count * 0.292
361+
return ep_amplifier
362+
352363
def _sift_sentiment_scores(self, sentiments):
353364
# separate positive versus negative sentiment scores
354365
pos_sum = 0.0
@@ -366,7 +377,7 @@ def _sift_sentiment_scores(self, sentiments):
366377
def score_valence(self, sentiments, text):
367378
if sentiments:
368379
sum_s = float(sum(sentiments))
369-
punct_emph_amplifier = self._punctuation_emphasis(sum_s, text)
380+
punct_emph_amplifier = self._punctuation_emphasis( text)
370381
if sum_s > 0:
371382
sum_s += punct_emph_amplifier
372383
elif sum_s < 0:
@@ -393,5 +404,5 @@ def score_valence(self, sentiments, text):
393404
sentiment_dict = {"neg": round(neg, 3),
394405
"neu": round(neu, 3),
395406
"pos": round(pos, 3),
396-
"compund": round(compound, 4)}
407+
"compound": round(compound, 4)}
397408
return sentiment_dict

08sentiment/sentiment_demo.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
"""
66
from __future__ import print_function # 兼容python3的print写法
77
from __future__ import unicode_literals # 兼容python3的编码处理
8-
# from Sentiment import SentimentIntensityAnalyzer
9-
from nltk.sentiment import SentimentIntensityAnalyzer
8+
from Sentiment import SentimentIntensityAnalyzer
9+
# from nltk.sentiment import SentimentIntensityAnalyzer
1010

11-
sentences = ["lilei is smart,handsome,and funny boy.",
12-
"lilei is not smart ,handsome,nor good boy."]
11+
sentences = ["lilei is smart, handsome, and funny boy.",
12+
"lilei is not smart , handsome, nor good boy."]
1313
analyzer = SentimentIntensityAnalyzer()
1414
for sentence in sentences:
1515
vs = analyzer.polarity_scores(sentence)

0 commit comments

Comments
 (0)