encodeFilename,
escape_rfc3986,
escape_url,
+ ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
InAdvancePagedList,
read_batch_urls,
sanitize_filename,
sanitize_path,
+ sanitize_url_path_consecutive_slashes,
+ prepend_extension,
+ replace_extension,
shell_quote,
smuggle_url,
str_to_int,
urlencode_postdata,
version_tuple,
xpath_with_ns,
+ xpath_text,
render_table,
match_str,
+ parse_dfxp_time_expr,
+ dfxp2srt,
)
self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def')
self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
+ self.assertEqual(sanitize_path('../abc'), '..\\abc')
+ self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc')
+ self.assertEqual(sanitize_path('./abc'), 'abc')
+ self.assertEqual(sanitize_path('./../abc'), '..\\abc')
+
+ def test_sanitize_url_path_consecutive_slashes(self):
+ self.assertEqual(
+ sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'),
+ 'http://hostname/foo/bar/filename.html')
+ self.assertEqual(
+ sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'),
+ 'http://hostname/foo/bar/filename.html')
+ self.assertEqual(
+ sanitize_url_path_consecutive_slashes('http://hostname//'),
+ 'http://hostname/')
+ self.assertEqual(
+ sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'),
+ 'http://hostname/foo/bar/filename.html')
+ self.assertEqual(
+ sanitize_url_path_consecutive_slashes('http://hostname/'),
+ 'http://hostname/')
+ self.assertEqual(
+ sanitize_url_path_consecutive_slashes('http://hostname/abc//'),
+ 'http://hostname/abc/')
+
+ def test_prepend_extension(self):
+ self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
+ self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
+ self.assertEqual(prepend_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp')
+ self.assertEqual(prepend_extension('abc', 'temp'), 'abc.temp')
+ self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
+ self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
+
+ def test_replace_extension(self):
+ self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
+ self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
+ self.assertEqual(replace_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp')
+ self.assertEqual(replace_extension('abc', 'temp'), 'abc.temp')
+ self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
+ self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
+
def test_ordered_set(self):
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
self.assertEqual(orderedSet([]), [])
def test_unescape_html(self):
self.assertEqual(unescapeHTML('%20;'), '%20;')
+ self.assertEqual(unescapeHTML('/'), '/')
+ self.assertEqual(unescapeHTML('/'), '/')
self.assertEqual(
unescapeHTML('é'), 'é')
self.assertEqual(
unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
'20150202')
+ self.assertEqual(unified_strdate('25-09-2014'), '20140925')
def test_find_xpath_attr(self):
testxml = '''<root>
self.assertEqual(find('media:song/media:author').text, 'The Author')
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
+ def test_xpath_text(self):
+ testxml = '''<root>
+ <div>
+ <p>Foo</p>
+ </div>
+ </root>'''
+ doc = xml.etree.ElementTree.fromstring(testxml)
+ self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
+ self.assertTrue(xpath_text(doc, 'div/bar') is None)
+ self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
+
def test_smuggle_url(self):
data = {"ö": "ö", "abc": [3]}
url = 'https://foo.bar/baz?x=y#a'
self.assertEqual(d['x'], 1)
self.assertEqual(d['y'], 'a')
+ on = js_to_json('["abc", "def",]')
+ self.assertEqual(json.loads(on), ['abc', 'def'])
+
+ on = js_to_json('{"abc": "def",}')
+ self.assertEqual(json.loads(on), {'abc': 'def'})
+
def test_clean_html(self):
self.assertEqual(clean_html('a:\nb'), 'a: b')
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 190, 'dislike_count': 10}))
+ def test_parse_dfxp_time_expr(self):
+ self.assertEqual(parse_dfxp_time_expr(None), 0.0)
+ self.assertEqual(parse_dfxp_time_expr(''), 0.0)
+ self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1)
+ self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1)
+ self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0)
+ self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1)
+
+ def test_dfxp2srt(self):
+ dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?>
+ <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+ <body>
+ <div xml:lang="en">
+ <p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
+ <p begin="1" end="2">第二行<br/>♪♪</p>
+ <p begin="2" end="3"><span>Third<br/>Line</span></p>
+ </div>
+ </body>
+ </tt>'''
+ srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+The following line contains Chinese characters and special symbols
+
+2
+00:00:01,000 --> 00:00:02,000
+第二行
+♪♪
+
+3
+00:00:02,000 --> 00:00:03,000
+Third
+Line
+
+'''
+ self.assertEqual(dfxp2srt(dfxp_data), srt_data)
+
if __name__ == '__main__':
unittest.main()