Skip to content

Commit 7aa7cdd

Browse files
committed
Merge pull request #118 from llluiop/master
add 0009
2 parents dd83a91 + fd5d834 commit 7aa7cdd

1 file changed

Lines changed: 37 additions & 0 deletions

File tree

llluiop/0009/FindLinks.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/usr/bin.env python
2+
3+
4+
from HTMLParser import HTMLParser
5+
from re import sub
6+
import urllib2
7+
import sys
8+
9+
10+
class HtmlParserMainText(HTMLParser):
11+
def __init__(self):
12+
HTMLParser.__init__(self)
13+
self.text = []
14+
15+
def handle_starttag(self, tag, attrs):
16+
for key, value in attrs:
17+
if value and 'http' in value:
18+
self.text.append(''.join(value) + '\n')
19+
20+
21+
22+
23+
def GetLinks():
24+
url = "http://www.cnbeta.com/"
25+
html = urllib2.urlopen(url).read()
26+
27+
parser = HtmlParserMainText()
28+
parser.feed(html)
29+
parser.close()
30+
31+
return ''.join(parser.text).strip()
32+
33+
34+
if __name__ == "__main__":
35+
reload(sys)
36+
sys.setdefaultencoding('utf-8')
37+
print GetLinks()

0 commit comments

Comments
 (0)