We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
2 parents dd83a91 + fd5d834 commit 7aa7cddCopy full SHA for 7aa7cdd
1 file changed
llluiop/0009/FindLinks.py
@@ -0,0 +1,37 @@
1
+#!/usr/bin.env python
2
+
3
4
+from HTMLParser import HTMLParser
5
+from re import sub
6
+import urllib2
7
+import sys
8
9
10
+class HtmlParserMainText(HTMLParser):
11
+ def __init__(self):
12
+ HTMLParser.__init__(self)
13
+ self.text = []
14
15
+ def handle_starttag(self, tag, attrs):
16
+ for key, value in attrs:
17
+ if value and 'http' in value:
18
+ self.text.append(''.join(value) + '\n')
19
20
21
22
23
+def GetLinks():
24
+ url = "http://www.cnbeta.com/"
25
+ html = urllib2.urlopen(url).read()
26
27
+ parser = HtmlParserMainText()
28
+ parser.feed(html)
29
+ parser.close()
30
31
+ return ''.join(parser.text).strip()
32
33
34
+if __name__ == "__main__":
35
+ reload(sys)
36
+ sys.setdefaultencoding('utf-8')
37
+ print GetLinks()
0 commit comments