Obtaining the HTTP status codes from Wikipedia

url = 'https://en.wikipedia.org/wiki/List_of_HTTP_status_codes' import re from urllib.request import urlopen from lxml import html page = urlopen(url).read().decode('utf-8') tree = html.fromstring(page) status_codes = tree.xpath('(//dt | //dt/a)/text()') codes = {} regex = re.compile('(\d{3}) (.*)', re.DOTALL) for d in status_codes: res = regex.search(d.split(' (')[0]) if res: code, desc = res.groups() codes[int(code)] = desc # View all status codes print(codes) # Check only some check = [304, 404, 430, 506] print([codes.get(c, 'N/A') for c in check]) # ['Not Modified', 'Not Found', 'N/A', 'Variant Also Negotiates']