-
Notifications
You must be signed in to change notification settings - Fork 2
/
testcrawlajax.py
executable file
·53 lines (42 loc) · 2.6 KB
/
testcrawlajax.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/python
import unittest
import os
import shutil
from crawlajax import CrawlAjax
class CrawlAjaxTest(unittest.TestCase):
def setUp(self):
self.instance = CrawlAjax(webfolder="testwww", snapshot_cmd="echo %s")
os.path.isdir("testwww") and shutil.rmtree("testwww")
def tearDown(self):
self.instance = CrawlAjax("testwww")
os.path.isdir("testwww") and shutil.rmtree("testwww")
def testSnapshot(self):
self.assertEqual(self.instance.snapshot(None), 0)
self.assertEqual(self.instance.snapshot([]), 0)
self.assertEqual(self.instance.snapshot(["http://example.com"]), 0)
self.assertEqual(self.instance.snapshot(["http://example.com/#!/foo", "http://hede.com/#!/bar"]), 1)
self.assertEqual(self.instance.snapshot(["http://example.com/#!/foo", "http://hede.com/#!/bar", "http://example.com/#!/foo"]), 1)
self.assertEqual(self.instance.snapshot(["http://example.com/#!/foo", "http://hede.com/#!/bar", "http://example.com/#!/foo", "http://example.com/#!/bar"]), 2)
def testPathFromURL(self):
self.assertEqual(self.instance.pathFromURLFragment("foo/--/bar/hede"), ("testwww/foo/--/bar/hede", "index.html"))
self.assertEqual(self.instance.pathFromURLFragment("/foo/--/bar/hede"), ("testwww//foo/--/bar/hede", "index.html"))
def testExtractHrefsFromHTML(self):
extractedURLs = self.instance.extractHrefsFromHTML("<a href='#!/foo/--/bar/hede'>hede</a>")
self.assertEqual(len(extractedURLs), 1)
self.assertEqual(extractedURLs[0], "#!/foo/--/bar/hede")
extractedURLs = self.instance.extractHrefsFromHTML("<a href='#!/foo/--/bar/hede'>hede</a><a href='#!/bar/--/bar/hede'>hede</a><a href=\"#!/aaa/--/bar/hede\">hede</a>")
self.assertEqual(len(extractedURLs), 3)
self.assertEqual(extractedURLs[0], "#!/foo/--/bar/hede")
self.assertEqual(extractedURLs[1], "#!/bar/--/bar/hede")
self.assertEqual(extractedURLs[2], "#!/aaa/--/bar/hede")
extractedURLs = self.instance.extractHrefsFromHTML("<a href='#!/bar/--/bar/hede'>hede</a><br />\r\n<a href=\"#!/aaa/--/bar/hede\">hede</a>")
self.assertEqual(len(extractedURLs), 2)
self.assertEqual(extractedURLs[0], "#!/bar/--/bar/hede")
self.assertEqual(extractedURLs[1], "#!/aaa/--/bar/hede")
def testSaveResponse(self):
testfolderpath = "testwww/foo/--/bar"
self.assertFalse(os.path.isdir(testfolderpath))
self.assertTrue(self.instance.saveResponse("/foo/--/bar/hede", "hede"))
self.assertTrue(os.path.isdir(testfolderpath))
if __name__ == '__main__':
unittest.main()