< Applied Programming < Internet Data
internet.py
"""This program demonstrates webpage, XML, and JSON processing.
Input:
None
Output:
XML and JSON sample data.
References:
https://www.mediawiki.org/wiki/Manual:Parameters_to_Special:Export
https://en.wikiversity.org/wiki/Python_Programming/Internet_Data
"""
import json
import os
import sys
import urllib.request
import xml.etree.ElementTree
def get_webpage(url):
"""Gets the text of a given URL.
Args:
url (string): A web page URL to retrieve.
Returns:
text (string): The raw text of the web page.
"""
try:
text = urllib.request.urlopen(url).read().decode()
return text
except Exception as exception:
print(str(exception) + " reading " + url)
os._exit(1)
def post_webpage(url, parameters):
"""Posts a webpage to given URL and parameters, returning any text response.
Args:
url (string): A web page URL to post.
parameters (dictionary): name:value pairs to post to the webpage.
Returns:
text (string): The raw text of the web page response.
"""
try:
data = urllib.parse.urlencode(parameters).encode()
request = urllib.request.Request(url, data)
text = urllib.request.urlopen(request).read().decode()
return text
except Exception as exception:
print(f"{str(exception)} posting to {url}")
os._exit(1)
def display_url(url, text):
"""Displays the text of a given URL.
Args:
url (string): A web page URL to display.
text (string): The text from the URL.
Returns:
None.
"""
print(url)
print(text)
print()
def display_xmltree(text):
"""Displays the given XML tree fields and data.
Args:
text (string): An XML tree to display.
Returns:
None.
"""
root = xml.etree.ElementTree.fromstring(text)
tree = xml.etree.ElementTree.ElementTree(root)
for element in tree.iter():
print(f"{element.tag}:\t\t{element.text}")
print()
def display_wikistats(text):
"""Displays timestamp and page views for Wikimedia REST API statistics in JSON format.
Args:
text (string): A JSON page retrieved from /wikimedia.org/api/rest_v1/metrics .
Returns:
None.
"""
dictionary = json.loads(text)
for item in dictionary["items"]:
print(f"{item['timestamp']}:\t\t{item['views']}")
def main():
"""Runs the main program logic."""
try:
url = "http://www.w3schools.com/xml/note.xml"
text = get_webpage(url)
display_url(url, text)
display_xmltree(text)
url = "https://en.wikiversity.org/wiki/Special:Export"
parameters = {"pages":"Applied Programming", "limit":"1"}
text = post_webpage(url, parameters)
display_url(url, text)
url = "https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikiversity/all-access/user/" + \
"Applied_Programming%2fInternet_Data" + \
"/daily/2018030100/2018033100"
text = get_webpage(url)
display_url(url, text)
display_wikistats(text)
except:
print("Unexpected error.")
print("Error:", sys.exc_info()[1])
print("File: ", sys.exc_info()[2].tb_frame.f_code.co_filename)
print("Line: ", sys.exc_info()[2].tb_lineno)
main()
Try It
Copy and paste the code above into one of the following free online development environments or use your own Python3 compiler / interpreter / IDE.
See Also
This article is issued from Wikiversity. The text is licensed under Creative Commons - Attribution - Sharealike. Additional terms may apply for the media files.