Merge pull request Show-Me-the-Code#165 from yemaobumei/master

horx · horx · commit d902166ff3ab · 2016-01-07T20:49:12.000+08:00
Python 练习册，每天一个小程序
diff --git a/python b/python
@@ -1 +1 @@
-Subproject commit d989ede362d8938f44dbf7358ec935823a32e9cd
+Subproject commit 4ae3b1cc45266a9a769960696ead49087cc02cd3
diff --git a/yefan/001/001.py b/yefan/001/001.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+分析
+其实要生成激活码(邀请码)也是很简单的事, 比如随机生成.或者使用GUID,UUID等,非常简单
+
+但是我们得考虑存入以及验证的问题.
+
+这里我参考产生唯一随机码的方法分析。这篇文章的思路:
+
+主键+随机码的方式.
+
+这种方法优点：使用也比较简单，不用直接去查询数据库，而最大的优点是查询的时候，可以根据邀请码直接得到主键id, 然后根据id去数据库查询(速度很快)，再比较查询出来的邀请码和用户提交的邀请码是否一致。
+
+生成:id(数据库primary key )->16进制 + "L(标识符)" +随机码
+获取id:获取16进制的id再转回10进制
+"""
+
+
+import random
+import string
+
+def activation_code(id,length=10):
+    '''
+    id + L + 随机码
+    string模块中的3个函数：string.letters，string.printable，string.printable
+    '''
+    prefix = hex(int(id))[2:]+ 'L'
+    length = length - len(prefix)
+    chars=string.ascii_letters+string.digits
+    return prefix + ''.join([random.choice(chars) for i in range(length)])
+
+def get_id(code):
+    ''' Hex to Dec '''
+    return str(int(code.upper(), 16))
+
+if __name__=="__main__":
+    for i in range(10,500,35):
+        code = activation_code(i)
+        id_hex = code.split('L')[0]
+        id  = get_id(id_hex)
+        print code,id
diff --git a/yefan/004/004.py b/yefan/004/004.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python  
+# -*- coding: utf-8 -*-  
+
+""" 
+python实现任一个英文的纯文本文件，统计其中的单词出现的个数、行数、字符数 
+"""  
+
+file_name = "movie.txt"  
+
+line_counts = 0  
+word_counts = 0  
+character_counts = 0  
+
+with open('C:\Python27\oneday_one\movie.txt', 'r') as f:  
+    for line in f:  
+        words = line.split()  
+
+        line_counts += 1  
+        word_counts += len(words)  
+        character_counts += len(line)  
+
+print "line_counts ", line_counts  
+print "word_counts ", word_counts  
+print "character_counts ", character_counts
diff --git a/yefan/007/007.py b/yefan/007/007.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+
+
+#list all the files in your path(完整路径名path\**.py)
+import os
+def get_files(path):
+    files=os.listdir(path)
+    files_path=[]
+    for fi in files:
+        fi_path= path+'\\' + fi
+        if os.path.isfile(fi_path):
+            if fi.split('.')[-1]=='py':
+                files_path.append(fi_path)
+        elif(os.path.isdir(fi_path)):
+            files_path+=get_files(fi_path)
+    return files_path
+
+# Count lines and blank lines and note lines in designated files
+def count_lines(files):
+    line, blank, note = 0, 0, 0
+    for filename in files:
+        f = open(filename, 'rb')
+        for l in f:
+            l = l.strip()
+            line += 1
+            if l == '':
+                blank += 1
+            elif l[0] == '#' or l[0] == '/':
+                note += 1
+        f.close()
+    return (line, blank, note)
+
+if __name__ == '__main__':
+    a=r'c:\python27'
+    #files = get_files(r'c:\python27\oneday_one')
+    files = get_files(r'F\v6:')
+    print len(files),files
+    lines = count_lines(files)
+    print 'Line(s): %d, black line(s): %d, note line(s): %d' % (lines[0], lines[1], lines[2])
+            
diff --git a/yefan/008/008.py b/yefan/008/008.py
@@ -0,0 +1,19 @@
+#!/usr/bin/python
+#coding=utf-8
+
+"""
+第 0008 题：一个HTML文件，找出里面的正文。
+"""
+
+from bs4 import BeautifulSoup
+
+def find_the_content(path):
+    with open(path) as f:
+        text = BeautifulSoup(f, 'lxml')
+        content = text.get_text().strip('\n')
+
+        return content.encode('gbk','ignore')
+
+
+if __name__ == '__main__':
+    print find_the_content(r'D:\Show-Me-the-Code_show-me-the-code_1.html')
diff --git a/yefan/009/009.py b/yefan/009/009.py
@@ -0,0 +1,20 @@
+#!/usr/bin/python
+#coding=utf-8
+
+"""
+第 0009 题：一个HTML文件，找出里面的链接
+"""
+
+from bs4 import BeautifulSoup
+
+def find_the_link(filepath):
+    links = []
+    with open(filepath) as f:
+        bs =BeautifulSoup(f,'lxml')
+        for i in bs.find_all('a'):
+            links.append(i['href'])
+    return links
+
+if __name__ == '__main__':
+    #print find_the_link('D:\Show-Me-the-Code_show-me-the-code_1.html')
+    
diff --git a/yefan/011/011.py b/yefan/011/011.py
@@ -0,0 +1,28 @@
+#!/bin/env python
+# -*- coding: utf-8 -*- 
+import codecs
+def read_txt():
+    l=[]
+    with codecs.open(r'c:\python27\oneday_one\1.txt') as fp:
+        for line in fp.readlines():
+           l.append(line.strip())
+    return l
+
+def check(l):
+    word=raw_input('word:')
+    for each_word in l:
+        if word==each_word:
+            print 'Freedom'
+            return None
+    print 'Human rights'
+    return None
+
+def main():
+    l=read_txt()
+    check(l)
+    print l
+
+if __name__=='__main__':
+    main()
+    
+    
diff --git a/yefan/013/013.py b/yefan/013/013.py
@@ -0,0 +1,39 @@
+#!/usr/bin/python
+# coding=utf-8
+
+"""
+第 0013 题： 用 Python 写一个爬图片的程序，爬 这个链接里的日本妹子图片 :-)
+"""
+
+import os
+import urllib
+from bs4 import BeautifulSoup
+from urlparse import urlsplit
+import re
+
+def catch_tieba_pics(url):
+    content =urllib.urlopen(url)
+    print type(content)
+    #f.write(content.read())
+    bs = BeautifulSoup(content, 'lxml')
+    print type(bs)
+    print bs.prettify() ################
+    for i in bs.find_all('img', {"class": "BDE_Image"}):
+        download_pic(i['src'])
+
+def download_pic(url):
+    image_content = urllib.urlopen(url).read()
+    file_name = os.path.basename(urlsplit(url)[2])
+    output = open(file_name, 'wb')
+    output.write(image_content)
+    output.close()
+
+
+if __name__ == '__main__':
+    #catch_tieba_pics('http://tieba.baidu.com/p/2166231880')
+    catch_tieba_pics('http://tieba.baidu.com/p/4203526008')
+    #catch_tieba_pics('http://www.zhihu.com/question/22995735') 
+  
+"""
+为什么知乎的网页print内容只有一点点
+"""
diff --git a/yefan/014/014.py b/yefan/014/014.py
@@ -0,0 +1,17 @@
+# coding = utf-8
+__author__ = 'Forec'
+import xlwt
+import re
+
+book = xlwt.Workbook(encoding = 'utf-8', style_compression=0)
+sheet = book.add_sheet('student',cell_overwrite_ok = True)
+line = 0
+info = re.compile(r'\"(\d+)\":\[\"(.*?)\",(\d+),(\d+),(\d+)\]')
+with open('student.txt',"r") as f:
+    data = f.read()
+    data=data.decode('gbk').encode('utf-8')
+for x in info.findall(data):
+    for i in range(len(x)):
+        sheet.write(line,i,x[i])
+    line+=1
+book.save('student.xls')
diff --git a/yefan/014/14.py b/yefan/014/14.py
@@ -0,0 +1,41 @@
+#!/bin/env python
+# -*- coding: utf-8 -*- 
+
+#导入模块
+import simplejson as json  
+import xlwt
+
+#从文件(JSON形式)中读取数据返回字典
+def read_file(filename):
+    with open(r'C:\Python27\oneday_one\student.txt','r') as fp:
+        content = fp.read().decode('gbk').encode('utf-8')
+        #print type(content)
+    #simplejson这个模块还没细看，怎么解码还是需要了解下
+    d = json.JSONDecoder().decode(content)
+    #d=json.loads(content)
+    return d  
+
+#生成对应的xls文件
+def gen_xls(d,filename):
+    fp = xlwt.Workbook()
+    table = fp.add_sheet('student',cell_overwrite_ok=True)
+    #试了下，与很多要转utf-8（ASCII码）存文件的情况不同，xls不接受ASCII码形式的存储，直接用字典里面的Unicode就行了，简直好评，不用在特意decode或者encode了
+    #想写得更加自动化一些，好复用.本身不太想用两层循环的，不过也不知道有没有更便捷的存储方式（比如整行自动匹配导入，算法是背后优化封装好的，就用了万能的这种方法）
+    for n in range(len(d)):
+        table.write(n,0,n+1)
+        m = 0
+        for record in d[str(n+1)]:
+            table.write(n,m+1,record)
+            m += 1
+    fp.save('student.xls')
+    print u'写入完毕'
+
+#主函数，嘛，最后还是用“丑陋的二重循环”实现了，但是其实也没什么，还是要看场景和优化，毕竟这也不是做查找或者排序，在日常使用中也不用太担心性能问题
+def main():
+    filename = 'student.txt'
+    xls_name = 'student.xls'
+    d = read_file(filename)
+    gen_xls(d,xls_name)
+
+if __name__ == '__main__':
+    main()
diff --git a/yefan/014/student.txt b/yefan/014/student.txt
@@ -0,0 +1,5 @@
+{
+    "1":["����",150,120,100],
+    "2":["����",90,99,95],
+    "3":["����",60,66,68]
+}
diff --git a/yefan/014/student.xls b/yefan/014/student.xls
diff --git a/yefan/015/015.py b/yefan/015/015.py
@@ -0,0 +1,16 @@
+# coding = utf-8
+__author__ = 'Forec'
+import xlwt
+import re
+
+book = xlwt.Workbook(encoding = 'utf-8',style_compression=0)
+sheet = book.add_sheet('number',cell_overwrite_ok = True)
+line = 0
+info = re.compile(r'\[(\d+),\s+(\d+),\s+(\d+)\]')
+with open(r'c:\python27\oneday_one\numbers.txt',"r") as f:
+    data = f.read().decode('gbk').encode('utf-8')
+for x in info.findall(data):
+    for i in range(len(x)):
+        sheet.write(line,i,x[i])
+    line+=1
+book.save('numbers.xls')
diff --git a/yefan/015/city.txt b/yefan/015/city.txt
@@ -0,0 +1,5 @@
+{
+    "1" : "�Ϻ�",
+    "2" : "����",
+    "3" : "�ɶ�"
+}
diff --git a/yefan/015/city.xls b/yefan/015/city.xls
diff --git a/yefan/020/020.py b/yefan/020/020.py
@@ -0,0 +1,26 @@
+#-*- coding=utf-8
+import xlwt
+import xlrd
+import re
+
+book = xlrd.open_workbook(r'c:\python27\oneday_one\2015.xls')
+print book.sheet_names()
+sheet=book.sheet_by_index(0)
+print sheet.name,sheet.nrows,sheet.ncols
+col3=sheet.col_values(3)
+li=[]
+for i in col3:
+    li.append(i)#.encode('utf-8')
+del li[0]
+s=[0,0]
+info=re.compile(ur"(\d+)[\u4e00-\u9fa5](\d*)[\u4e00-\u9fa5]*")#匹配汉字！！！！
+for each_time in li:
+    t=info.match(each_time).groups()
+    t=list(t)
+    if u'' in t:
+       t[1],t[0]=t[0],u'0'
+    s[0]=s[0]+int(t[0])
+    s[1]=s[1]+int(t[1])
+s[0],s[1]=s[0]+int(s[1]/60),s[1]%60
+print '通话时长累计%d分%d秒'%(s[0],s[1])
+
diff --git a/yefan/020/2015.xls b/yefan/020/2015.xls
diff --git a/yefan/README.md b/yefan/README.md
@@ -0,0 +1 @@
+所有程序是在win7 python2.7下调试成功，其中很多借鉴了此项目下其他贡献者的代码。侵删。

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-Subproject commit d989ede362d8938f44dbf7358ec935823a32e9cd`
	`1`	`+Subproject commit 4ae3b1cc45266a9a769960696ead49087cc02cd3`