From: Philipp Hagemeister Date: Mon, 25 Aug 2014 16:36:42 +0000 (+0200) Subject: Merge branch 'master' of github.com:rg3/youtube-dl X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=f971dcbba0671391c0ab3180d02143db28675c9a;hp=bcc069a937ca51b85e57fee61eff2f45f44816ac;p=youtube-dl Merge branch 'master' of github.com:rg3/youtube-dl --- diff --git a/test/helper.py b/test/helper.py index 22d763860..01b11f661 100644 --- a/test/helper.py +++ b/test/helper.py @@ -102,7 +102,10 @@ def expect_info_dict(self, expected_dict, got_dict): match_rex = re.compile(match_str) self.assertTrue( - isinstance(got, compat_str) and match_rex.match(got), + isinstance(got, compat_str), + 'Expected a %r object, but got %r' % (compat_str, type(got))) + self.assertTrue( + match_rex.match(got), u'field %s (value: %r) should match %r' % (info_field, got, match_str)) elif isinstance(expected, type): got = got_dict.get(info_field) diff --git a/test/test_download.py b/test/test_download.py index c8d4ec2c8..6422ef119 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -7,6 +7,7 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import ( + assertGreaterEqual, get_params, gettestcases, expect_info_dict, @@ -136,12 +137,18 @@ def generator(test_case): self.assertEqual(res_dict['_type'], 'playlist') expect_info_dict(self, test_case.get('info_dict', {}), res_dict) if 'playlist_mincount' in test_case: - self.assertGreaterEqual( + assertGreaterEqual( + self, len(res_dict['entries']), test_case['playlist_mincount'], 'Expected at least %d in playlist %s, but got only %d' % ( test_case['playlist_mincount'], test_case['url'], len(res_dict['entries']))) + if 'playlist_count' in test_case: + self.assertEqual( + len(res_dict['entries']), + test_case['playlist_count'], + 'Expected at %d in playlist %s, but got %d.') for tc in test_cases: tc_filename = get_tc_filename(tc) diff --git a/test/test_playlists.py b/test/test_playlists.py index 3f79a7d6a..0137b8399 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -310,24 +310,6 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker') self.assertEqual(len(result['entries']), 3) - def test_GoogleSearch(self): - dl = FakeYDL() - ie = GoogleSearchIE(dl) - result = ie.extract('gvsearch15:python language') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'python language') - self.assertEqual(result['title'], 'python language') - self.assertEqual(len(result['entries']), 15) - - def test_generic_rss_feed(self): - dl = FakeYDL() - ie = GenericIE(dl) - result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml') - self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml') - self.assertEqual(result['title'], 'Zero Punctuation') - self.assertTrue(len(result['entries']) > 10) - def test_ted_playlist(self): dl = FakeYDL() ie = TEDIE(dl) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 59cc8a5ff..3a908d01f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -341,6 +341,16 @@ class GenericIE(InfoExtractor): 'uploader': 'www.handjobhub.com', 'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub', } + }, + # RSS feed + { + 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml', + 'info_dict': { + 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml', + 'title': 'Zero Punctuation', + 'description': 're:' + }, + 'playlist_mincount': 11, } ] diff --git a/youtube_dl/extractor/googlesearch.py b/youtube_dl/extractor/googlesearch.py index 383032d81..469e1f935 100644 --- a/youtube_dl/extractor/googlesearch.py +++ b/youtube_dl/extractor/googlesearch.py @@ -14,6 +14,14 @@ class GoogleSearchIE(SearchInfoExtractor): _MAX_RESULTS = 1000 IE_NAME = 'video.google:search' _SEARCH_KEY = 'gvsearch' + _TEST = { + 'url': 'gvsearch15:python language', + 'info_dict': { + 'id': 'python language', + 'title': 'python language', + }, + 'playlist_count': 15, + } def _get_n_results(self, query, n): """Get a specified number of results for a query""" diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 53977cd2a..16bc7408a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1458,6 +1458,12 @@ def urlencode_postdata(*args, **kargs): return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii') +try: + etree_iter = xml.etree.ElementTree.Element.iter +except AttributeError: # Python <=2.6 + etree_iter = lambda n: n.findall('.//*') + + def parse_xml(s): class TreeBuilder(xml.etree.ElementTree.TreeBuilder): def doctype(self, name, pubid, system): @@ -1465,7 +1471,14 @@ def parse_xml(s): parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder()) kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {} - return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs) + tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs) + # Fix up XML parser in Python 2.x + if sys.version_info < (3, 0): + for n in etree_iter(tree): + if n.text is not None: + if not isinstance(n.text, compat_str): + n.text = n.text.decode('utf-8') + return tree if sys.version_info < (3, 0) and sys.platform == 'win32':