66from __future__ import print_function # 兼容python3的print写法
77from __future__ import unicode_literals # 兼容python3的编码处理
88
9- import re , math , requests , json , string
9+ import re , math , string
1010from itertools import product
1111from inspect import getsourcefile
1212from os .path import abspath , join , dirname
1717C_INCR = 0.733
1818N_SCALAR = - 0.74
1919
20- REGEX_REMOVE_PUNCTUATION = re .compile ('[%s]' % re .escape (string .punctuation ))
20+ REGEX_REMOVE_PUNCTUATION = re .compile ('[{0}]' . format ( re .escape (string .punctuation ) ))
2121PUNC_LIST = ["." , "!" , "?" , "," , ";" , ":" , "-" , "'" , "\" " , "!!" ,
2222 "!!!" , "??" , "???" , "?!?" , "!?!" , "?!?!" , "!?!?" ]
2323NEGATE = ['aint' , 'arent' , 'cannot' , 'cant' , 'couldnt' , 'darent' , 'didnt' ,
@@ -103,7 +103,7 @@ def allcap_differential(words):
103103 if word .isupper ():
104104 allcap_words += 1
105105 cap_differential = len (words ) - allcap_words
106- if cap_differential > 0 and cap_differential < len (words ):
106+ if 0 < cap_differential < len (words ):
107107 is_different = True
108108 return is_different
109109
@@ -122,7 +122,7 @@ def scalar_inc_dec(word, valence, is_cap_diff):
122122 scalar = BOOSTER_DICT [word_lower ]
123123 if valence < 0 :
124124 scalar *= - 1
125- # check if word in ALLCAPS
125+ # check if word in ALL CAPS
126126 if word .isupper () and is_cap_diff :
127127 if valence > 0 :
128128 scalar += C_INCR
@@ -140,8 +140,8 @@ def __init__(self, text):
140140 if not isinstance (text , str ):
141141 text = str (text .encode ('utf-8' ))
142142 self .text = text
143- self .word_and_emoticons = self ._words_and_emoticons ()
144- self .is_cap_diff = allcap_differential (self .word_and_emoticons )
143+ self .words_and_emoticons = self ._words_and_emoticons ()
144+ self .is_cap_diff = allcap_differential (self .words_and_emoticons )
145145
146146 def _words_plus_punc (self ):
147147 """
@@ -211,12 +211,13 @@ def polarity_scores(self, text):
211211 sentitext = SentiText (text )
212212
213213 sentiments = []
214- words_and_emoticons = sentitext .word_and_emoticons
214+ words_and_emoticons = sentitext .words_and_emoticons
215215 for item in words_and_emoticons :
216216 valence = 0
217217 i = words_and_emoticons .index (item )
218218 if (i < len (words_and_emoticons ) - 1 and item .lower () == "kind" and \
219- words_and_emoticons [i + 1 ].lower () == "of" ) or item .lower () in BOOSTER_DICT :
219+ words_and_emoticons [i + 1 ].lower () == "of" ) or \
220+ item .lower () in BOOSTER_DICT :
220221 sentiments .append (valence )
221222 continue
222223
@@ -237,8 +238,10 @@ def sentiment_valence(self, valence, sentitext, item, i, sentiments):
237238 else :
238239 valence -= C_INCR
239240 for start_i in range (0 , 3 ):
240- if i > start_i and words_and_emoticons [i - (start_i + 1 )].lower () not in self .lexicon :
241- s = scalar_inc_dec (words_and_emoticons [i - (start_i + 1 )], valence , is_cap_diff )
241+ if i > start_i and words_and_emoticons [i - (start_i + 1 )].lower () \
242+ not in self .lexicon :
243+ s = scalar_inc_dec (words_and_emoticons [i - (start_i + 1 )], \
244+ valence , is_cap_diff )
242245 if start_i == 1 and s != 0 :
243246 s = s * 0.95
244247 if start_i == 2 and s != 0 :
@@ -262,10 +265,11 @@ def _least_check(self, valence, words_and_emoticons, i):
262265 """
263266 if i > 1 and words_and_emoticons [i - 1 ].lower () not in self .lexicon \
264267 and words_and_emoticons [i - 1 ].lower () == "least" :
265- if words_and_emoticons [i - 2 ].lower () != "at" and words_and_emoticons [i - 2 ].lower () != "very" :
268+ if words_and_emoticons [i - 2 ].lower () != "at" and \
269+ words_and_emoticons [i - 2 ].lower () != "very" :
266270 valence = valence * N_SCALAR
267- elif i > 0 and words_and_emoticons [i - 1 ].lower () not in self .lexicon \
268- and words_and_emoticons [i - 1 ].lower () == "least" :
271+ elif i > 0 and words_and_emoticons [i - 1 ].lower () not in self .lexicon and \
272+ words_and_emoticons [i - 1 ].lower () == "least" :
269273 valence = valence * N_SCALAR
270274 return valence
271275
@@ -331,24 +335,31 @@ def _never_check(self, valence, words_and_emoticons, start_i, i):
331335 valence = valence * N_SCALAR
332336 return valence
333337
334- def _punctuation_emphasis (self , sum_s , text ):
338+ def _punctuation_emphasis (self , text ):
335339 # add emphasis
336340 ep_amplifier = self ._amplify_ep (text )
337341 qm_amplifier = self ._amplify_qm (text )
338342 punc_emph_amplifier = ep_amplifier + qm_amplifier
339343 return punc_emph_amplifier
340344
341- def _amplify_eq (self , text ):
342- # check for added emphasis
345+ def _amplify_qm (self , text ):
343346 qm_count = text .count ("?" )
344- qm_amplifier = 0
347+ qm_amplifier = 0 ;
345348 if qm_count > 1 :
346349 if qm_count <= 3 :
347350 qm_amplifier = qm_count * 0.18
348351 else :
349352 qm_amplifier = 0.96
350353 return qm_amplifier
351354
355+ def _amplify_ep (self , text ):
356+ # check for added emphasis
357+ ep_count = text .count ("!" )
358+ if ep_count > 4 :
359+ ep_count = 4
360+ ep_amplifier = ep_count * 0.292
361+ return ep_amplifier
362+
352363 def _sift_sentiment_scores (self , sentiments ):
353364 # separate positive versus negative sentiment scores
354365 pos_sum = 0.0
@@ -366,7 +377,7 @@ def _sift_sentiment_scores(self, sentiments):
366377 def score_valence (self , sentiments , text ):
367378 if sentiments :
368379 sum_s = float (sum (sentiments ))
369- punct_emph_amplifier = self ._punctuation_emphasis (sum_s , text )
380+ punct_emph_amplifier = self ._punctuation_emphasis ( text )
370381 if sum_s > 0 :
371382 sum_s += punct_emph_amplifier
372383 elif sum_s < 0 :
@@ -393,5 +404,5 @@ def score_valence(self, sentiments, text):
393404 sentiment_dict = {"neg" : round (neg , 3 ),
394405 "neu" : round (neu , 3 ),
395406 "pos" : round (pos , 3 ),
396- "compund " : round (compound , 4 )}
407+ "compound " : round (compound , 4 )}
397408 return sentiment_dict
0 commit comments