python/Python_Basic/python_http_request.py at python · LiKevin/python

44 lines (37 loc) · 1.18 KB

__author__ = 'k22li'

import requests

from xml.etree import ElementTree

def requestUrlResources():

"""

request resources from the targe urls

"""

urlToVisit = raw_input('Pls. give your web address to visit: ')

req = requests.request('GET', urlToVisit)

return req.text

def encodingRequestedConents(contents = '', coding = 'gb2312'):

"""

decode the page contents in case there are illegal encodings

"""

if not isinstance(contents, unicode):

print 'non-unicode confirmed'

decodedContents = contents.decode(coding)

else:

print 'unicode confirmed'

decodedContents = contents.encode('utf-8')

return decodedContents

def getChildrenContents(strings = ''):

"""

return the key nodes from the downloaded url resources

"""

strings = strings.rstrip()

tree = ElementTree.fromstring(strings)

root = tree.getroot()

print root.tag

# for node in tree.getchildren():

# print node.tag, node.attrib

if __name__ == '__main__':

urlContent = requestUrlResources()

decodedContent = encodingRequestedConents(urlContent, 'ascii')

print decodedContent

k = getChildrenContents(decodedContent)

Provide feedback