Skip to content

weapp/foxyhtml

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

7 Commits
 
 
 
 
 
 
 
 

Repository files navigation

FoxyHTML

A HTML Parser written in Python

Example

import urllib2
from pprint import pprint
html = urllib2.urlopen("http://www.thetimes.co.uk").read()
parsed = FoxyHtml(html)

print
print "Example1"
print "Way 1:"
results = parsed.search(cls="nib-inner")
for result in results:
    pprint(
        [result.joinedtexts() for result in result.search(tagname="li")])

print
print "Way 2:"
pprint(
    [result.joinedtexts() for result in parsed.foxycss(".nib-inner li")])

print
print "Example2"
pprint([dict(zip(["title", "body"], fields))
        for bricks in parsed.search(cls="brick-mpu")
        for brick in bricks.search(cls="brick-sixth")
        for fields in [brick.search(cls="f-hc").joinedtexts()]
        if all(fields)
        ])

print
print "Example3"
pprint([{
        'title': titles[0],
        'subtitile': titles[1],
        'body': brick.search(tagname="p").joinedtexts()
        }
        for bricks in parsed.search(cls="brick-arts")
        for brick in bricks.search(cls="brick-sixth")
        for titles in brick.search(cls="f-hc").texts()
        ])

print
print "Example4"
bricks = parsed.search(cls="brick-arts")
pprint(bricks.search(tagname="img").attr("src"))
print

print
print "Example5"
brick = bricks.search(cls="brick-sixth")[0]

print brick
print

print brick.rebuild()
print

print brick.clean().rebuild()
print

About

A HTML Parser written in Python

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages