python201cx
diff --git a/‎10web-server/flask_api_demo/flask-restful_demo.py‎
Lines changed: 59 additions & 0 deletions b/‎10web-server/flask_api_demo/flask-restful_demo.py‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎10web-server/flask_api_demo/login_weibo_demo.py‎
Lines changed: 185 additions & 0 deletions b/‎10web-server/flask_api_demo/login_weibo_demo.py‎
Lines changed: 185 additions & 0 deletions
diff --git a/‎11scikit-learn/text_cluster/__init__.py‎
Lines changed: 7 additions & 0 deletions b/‎11scikit-learn/text_cluster/__init__.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎11scikit-learn/text_cluster/feature.py‎
Lines changed: 140 additions & 0 deletions b/‎11scikit-learn/text_cluster/feature.py‎
Lines changed: 140 additions & 0 deletions
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+"""
+@author:XuMing（[email protected])
+@description: 
+"""
+
+from flask import Flask, request
+from flask_restful import Api, Resource
+
+app = Flask(__name__)
+# 创建一个 Api 对象，把 app 作为参数
+api = Api(app)
+
+# 创建 Welcome 类，描述欢迎信息(框架可以序列化任意类型的对象)
+class Welcome:
+
+    def __init__(self, name):
+        self.name = name
+        self.message = "Hello %s, Welcome to flask-restaction!" % name
+
+# 创建一个 Hello 类，定义 get 方法
+class Hello:
+    """Hello world"""
+
+    # 在 get 方法文档字符串中描述输入参数和输出的格式
+    def get(self, name):
+        """
+        Get welcome message
+
+        $input:
+            name?str&default="world": Your name
+        $output:
+            message?str: Welcome message
+        """
+        return Welcome(name)
+
+class HelloWorld(Resource):
+    def get(self):
+        return {'hello': 'world'}
+
+api.add_resource(HelloWorld, '/')
+
+
+
+todos = {}
+
+class TodoSimple(Resource):
+    def get(self, todo_id):
+        return {todo_id: todos[todo_id]}
+
+    def put(self, todo_id):
+        todos[todo_id] = request.form['data']
+        return {todo_id: todos[todo_id]}
+
+api.add_resource(TodoSimple, '/<string:todo_id>')
+
+
+if __name__ == '__main__':
+    app.run(debug=True)
@@ -0,0 +1,185 @@
+# -*- coding: utf-8 -*-
+"""
+@author:XuMing（[email protected])
+@description:
+"""
+
+
+import json
+from datetime import datetime
+
+import pymysql
+import requests
+from flask import Flask, redirect, request
+
+app = Flask(__name__)
+
+
+
+class ApiError(Exception):
+    """ 错误异常处理 """
+
+    def __init__(self, code, msg):
+        super(ApiError, self).__init__()
+        # 错误状态码
+        self.code = code
+        # 错误信息
+        self.msg = msg
+
+    def __str__(self):
+        return '{0}:{1}'.format(self.code, self.msg)
+
+
+class ServerError(Exception):
+    pass
+
+class WeiboClient(object):
+    API_URL = 'https://api.weibo.com/'  # 微博api地址
+
+    def __init__(self, client_id, client_secret):
+        self.client_id = client_id  # 应用id
+        self.client_secret = client_secret  # 应用秘钥
+        self.token = {}
+
+    @property
+    def access_token(self):
+        if self.token:
+            return self.token['access_token']
+        return None
+
+    def fetch(self, method, url, params={}):
+        '''
+        接口请求的统一封装
+        '''
+        try:
+            if method == 'POST':
+                resp = requests.post(url, params)
+            else:
+                resp = requests.get(url, params)
+
+            if resp.status_code >= 200 and resp.status_code < 300:
+                # 接口正常
+                rest = resp.json()
+                if 'error_code' in rest:
+                    raise ApiError(rest['error_code'], rest['error'])
+                return rest
+            elif resp.status_code >= 400:
+                raise ServerError()
+        except ApiError as e:
+            print('ApiError')
+            pass
+        except ServerError as e:
+            print('ServerError')
+        except Exception:
+            print('Exception')
+
+
+    def get_ticket_url(self, redirect_uri=None):
+        '''
+        获取从浏览器跳转的 url
+        用来获取token
+        '''
+        if redirect_uri is None:
+            redirect_uri = 'http://test.baidu.com'
+        url = self.API_URL + 'oauth2/authorize?client_id={0}&response_type=code&redirect_uri={1}'.format(
+            self.client_id,
+            redirect_uri
+        )
+        # get请求
+        return url
+
+    def get_token(self, code):
+        '''
+        获取token
+        '''
+        # 如果已经有了，则直接返回
+        if self.token:
+            return self.token
+        url = self.API_URL + 'oauth2/access_token?client_id={0}&client_secret={1}&grant_type=authorization_code&redirect_uri=http://test.baidu.com&code={2}'.format(
+            self.client_id,
+            self.client_secret,
+            code
+        )
+        resp = self.fetch('POST', url)
+        self.token = resp.json()
+        return self.token
+
+    def get_user_info(self, access_token, uid):
+        '''
+        获取用户信息
+        '''
+        url = self.API_URL + '2/users/show.json'
+        # access_token = self.get_token(code)['access_token']
+        resp = self.fetch('GET', url, {
+            'access_token': access_token,
+            'uid': uid
+        })
+        return resp.json()
+
+    def get_conn(self):
+        """ 获取mysql 的连接 """
+        try:
+            conn = pymysql.connect(
+                db='test',
+                host='localhost',
+                user='root',
+                password='xxxxMMMM3333#',
+                charset='utf8'
+            )
+        except:
+            pass
+        return conn
+
+    def weibo_share(self):
+        '''
+        分享数据到微博
+        '''
+
+        url = self.API_URL + '2/statuses/share.json'
+        resp = self.fetch('POST', url, {
+            'status': '现在是北京时间： {0} http://test.baidu.com'.format(datetime.now())
+        })
+        return resp
+
+client_id = ''
+client_secret = ''
+client = WeiboClient(client_id, client_secret)
+
+
+# 登录
+@app.route('/')
+def index():
+    code = request.args.get('code', 200)
+    # 根据code来获取token
+    token = client.get_token(code)
+    # 获取用户信息
+
+    user_info = client.get_user_info(token['access_token'], token['uid'])
+    third_id = user_info['id']
+    nickname = user_info['screen_name']
+    headimg = user_info['profile_image_url']
+
+    # 获取数据库的链接
+    conn = client.get_conn()
+    cursor = conn.cursor()
+    sql = "INSERT INTO `user`(`third_id`, `nickname`, `headimg`) VALUES('{third_id}', '{nickname}', '{headimg}')".format(
+        third_id=third_id, nickname=nickname, headimg=headimg)
+    cursor.execute(sql)
+    conn.autocommit(True)
+    return json.dumps(user_info)
+
+
+# 登录回调获取token
+@app.route('/weibo')
+def weibo():
+    ticket = client.get_ticket_url()
+    return redirect(ticket)
+
+# 分享
+@app.route('/share')
+def share():
+    rest = client.weibo_share()
+    return json.dumps(rest)
+
+if __name__ == '__main__':
+    app.run(debug=True, port=8010)
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+
+"""
+@author: XuMing <[email protected]>
+@summary:
+"""
+
@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+"""
+@author:XuMing（[email protected])
+@description: 
+"""
+
+import math
+
+import jieba
+import jieba.analyse
+import numpy as np
+
+
+def read_words(file_path):
+    words = set()
+    with open(file_path, "r", encoding='utf-8') as f:
+        for line in f:
+            line = line.strip()
+            words.add(line)
+    return words
+
+
+def trim_stopwords(words, stop_words_set):
+    """
+    去除切词文本中的停用词
+    :param words:
+    :param stop_words_set:
+    :return:
+    """
+    new_words = []
+    for w in words:
+        if w in stop_words_set:
+            continue
+        new_words.append(w)
+    return new_words
+
+
+def tfidf(term, doc, word_dict, doc_set):
+    tf = float(doc.count(term)) / (len(doc) + 0.001)
+    idf = math.log(float(len(doc_set)) / word_dict[term])
+    return tf * idf
+
+
+def idf(term, word_dict, docset):
+    idf = math.log(float(len(docset)) / word_dict[term])
+    return idf
+
+
+def get_all_vector(file_path, stop_words):
+    names = []
+    docs = []
+    word_set = set()
+    with open(file_path, 'r', encoding='utf-8')as f:
+        for line in f:
+            line = line.strip()
+            cols = line.split("\t")
+            userid = cols[0]
+            names.append(userid)
+            content = " ".join(cols[1:])
+            content = content.lower().replace("{地域}{投放地域}", "").replace("{关键词}", "")
+            words = jieba.lcut(content)
+            doc = trim_stopwords(words, stop_words)
+            docs.append(doc)
+            word_set |= set(doc)
+
+    docs_vsm = []
+    for doc in docs:
+        temp_vector = []
+        for word in word_set:
+            temp_vector.append(doc.count(word) * 1.0)
+        # print temp_vector[-30:-1]
+        docs_vsm.append(temp_vector)
+
+    docs_matrix = np.array(docs_vsm)
+    # print docs_matrix.shape
+    # print len(np.nonzero(docs_matrix[:,3])[0])
+    column_sum = [float(len(np.nonzero(docs_matrix[:, i])[0])) for i in range(docs_matrix.shape[1])]
+    column_sum = np.array(column_sum)
+    column_sum = docs_matrix.shape[0] / column_sum
+    idf = np.log(column_sum)
+    idf = np.diag(idf)
+    # print idf.shape
+    # row_sum    = [ docs_matrix[i].sum() for i in range(docs_matrix.shape[0]) ]
+    # print idf
+    # print column_sum
+    tfidf = np.dot(docs_matrix, idf)
+
+    return names, tfidf
+
+
+def gen_sim(A, B):
+    num = float(np.dot(A, B.T))
+    denum = np.linalg.norm(A) * np.linalg.norm(B)
+    if denum == 0:
+        denum = 1
+    cosn = num / denum
+    sim = 0.5 + 0.5 * cosn
+    return sim
+
+
+def rand_center(data_set, k):
+    n = np.shape(data_set)[1]
+    centroids = np.mat(np.zeros((k, n)))  # create centroid mat
+    for j in range(n):  # create random cluster centers, within bounds of each dimension
+        minJ = min(data_set[:, j])
+        rangeJ = float(max(data_set[:, j]) - minJ)
+        centroids[:, j] = np.mat(minJ + rangeJ * np.random.rand(k, 1))
+    return centroids
+
+
+def kmeans(data_set, k):
+    m = np.shape(data_set)[0]
+    clusterAssment = np.mat(np.zeros((m, 2)))  # create mat to assign data points
+    # to a centroid, also holds SE of each point
+    centroids = rand_center(data_set, k)
+    counter = 0
+    while counter <= 50:
+        counter += 1
+        for i in range(m):  # for each data point assign it to the closest centroid
+            minDist = np.inf
+            minIndex = -1
+            for j in range(k):
+                distJI = gen_sim(centroids[j, :], data_set[i, :])
+                if distJI < minDist:
+                    minDist = distJI
+                    minIndex = j
+            clusterAssment[i, :] = minIndex, minDist ** 2
+        # print centroids
+        for cent in range(k):  # recalculate centroids
+            ptsInClust = data_set[np.nonzero(clusterAssment[:, 0].A == cent)[0]]  # get all the point in this cluster
+            centroids[cent, :] = np.mean(ptsInClust, axis=0)  # assign centroid to mean
+    return centroids, clusterAssment
+
+
+if __name__ == "__main__":
+    stop_words = read_words("../../data/stopword.txt")
+    names, tfidf_mat = get_all_vector("./yl_10.txt", stop_words)
+    myCentroids, clustAssing = kmeans(tfidf_mat, 8)
+    for label, name in zip(clustAssing[:, 0], names):
+        print(label, name)