< Applied Programming < Internet Data

internet.py

"""This program demonstrates webpage, XML, and JSON processing.

Input:
    None

Output:
    XML and JSON sample data.

References:
    https://www.mediawiki.org/wiki/Manual:Parameters_to_Special:Export
    https://en.wikiversity.org/wiki/Python_Programming/Internet_Data
    
"""

import json
import os
import sys
import urllib.request
import xml.etree.ElementTree

def get_webpage(url):
    """Gets the text of a given URL.
    
    Args:
        url (string): A web page URL to retrieve.
        
    Returns:
        text (string): The raw text of the web page.
    
    """
    try:
        text = urllib.request.urlopen(url).read().decode()
        return text
    except Exception as exception:
        print(str(exception) + " reading " + url)
        os._exit(1)
    

def post_webpage(url, parameters):
    """Posts a webpage to given URL and parameters, returning any text response.
    
    Args:
        url (string): A web page URL to post.
        parameters (dictionary): name:value pairs to post to the webpage.
        
    Returns:
        text (string): The raw text of the web page response.
    
    """
    try:
        data = urllib.parse.urlencode(parameters).encode()
        request = urllib.request.Request(url, data)
        text = urllib.request.urlopen(request).read().decode()
        return text
    except Exception as exception:
        print(f"{str(exception)} posting to {url}")
        os._exit(1)


def display_url(url, text):
    """Displays the text of a given URL.
    
    Args:
        url (string):  A web page URL to display.
        text (string): The text from the URL.
        
    Returns:
        None.
    
    """
    print(url)
    print(text)
    print()


def display_xmltree(text):
    """Displays the given XML tree fields and data.
    
    Args:
        text (string): An XML tree to display.
        
    Returns:
        None.
    
    """
    root = xml.etree.ElementTree.fromstring(text)
    tree = xml.etree.ElementTree.ElementTree(root)

    for element in tree.iter():
        print(f"{element.tag}:\t\t{element.text}")
    print()
    
    
def display_wikistats(text):
    """Displays timestamp and page views for Wikimedia REST API statistics in JSON format.
    
    Args:
        text (string): A JSON page retrieved from /wikimedia.org/api/rest_v1/metrics .
        
    Returns:
        None.
    
    """
    dictionary = json.loads(text)
    for item in dictionary["items"]:
        print(f"{item['timestamp']}:\t\t{item['views']}")
    
    
def main():
    """Runs the main program logic."""

    try:
        url = "http://www.w3schools.com/xml/note.xml"
        text = get_webpage(url)
        display_url(url, text)
        display_xmltree(text)

        url = "https://en.wikiversity.org/wiki/Special:Export"
        parameters = {"pages":"Applied Programming", "limit":"1"}
        text = post_webpage(url, parameters)
        display_url(url, text)

        url = "https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikiversity/all-access/user/" + \
            "Applied_Programming%2fInternet_Data" + \
            "/daily/2018030100/2018033100"
        text = get_webpage(url)
        display_url(url, text)
        display_wikistats(text)
    except:
        print("Unexpected error.")
        print("Error:", sys.exc_info()[1])
        print("File: ", sys.exc_info()[2].tb_frame.f_code.co_filename) 
        print("Line: ", sys.exc_info()[2].tb_lineno)


main()

Try It

Copy and paste the code above into one of the following free online development environments or use your own Python3 compiler / interpreter / IDE.

See Also

This article is issued from Wikiversity. The text is licensed under Creative Commons - Attribution - Sharealike. Additional terms may apply for the media files.