forked from heqin-zhu/algorithm
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmarkov.py
More file actions
58 lines (52 loc) · 1.55 KB
/
markov.py
File metadata and controls
58 lines (52 loc) · 1.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
''' mbinary
#########################################################################
# File : markov.py
# Author: mbinary
# Mail: [email protected]
# Blog: https://mbinary.xyz
# Github: https://github.com/mbinary
# Created Time: 2018-07-06 15:57
# Description:
#########################################################################
'''
from random import randint
import re
class markov:
def __init__(self,txt):
self.words= self.clean(txt)
self.dic = self.getDic(self.words)
def clean(self,text):
text = text.replace("\n", " ");
text = text.replace("\"", "");
# 保证每个标点符号都和前面的单词在一起
# 这样不会被剔除,保留在马尔可夫链中
punctuation = [',', '.', ';',':']
for symbol in punctuation:
text = text.replace(symbol, symbol+" ");
return re.split(' +',text)
def getDic(self,words):
dic = {}
end = len(words)
for i in range(1,end):
if words[i-1] not in dic:
dic[words[i-1]] = {words[i]:1}
elif words[i] not in dic[words[i-1]]:
dic[words[i-1]][words[i]] = 1
else: dic[words[i-1]][words[i]] +=1
return dic
def getSum(self,dic):
if '%size' not in dic:
dic['%size'] = sum(list(dic.values()))
return dic['%size']
def nextWord(self,word):
k = randint(1,self.getSum(self.dic[word]))
for i,j in self.dic[word].items():
k-=j
if k<=0:return i
def genSentence(self,begin = 'I',length = 30):
li = [begin]
nextWord= begin
for i in range(1,length):
nextWord= self.nextWord(nextWord)
li.append(nextWord)
return ' '.join(li)