X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=test%2Ftest_InfoExtractor.py;h=da6cd39b65c4e23a6cd6059631b2a75ed04b437b;hb=067aa17edf5a46a8cbc4d6b90864eddf051fa2bc;hp=6f52e11f7b4b58c388e1a2bde12a64a76c62d107;hpb=30bb6ce1a49f37f11afdca96fa7fd62ec56395c3;p=youtube-dl diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 6f52e11f7..da6cd39b6 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -9,10 +9,30 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL, expect_dict, expect_value +from test.helper import FakeYDL, expect_dict, expect_value, http_server_port +from youtube_dl.compat import compat_etree_fromstring, compat_http_server from youtube_dl.extractor.common import InfoExtractor from youtube_dl.extractor import YoutubeIE, get_info_extractor from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError +import threading + + +TEAPOT_RESPONSE_STATUS = 418 +TEAPOT_RESPONSE_BODY = "

418 I'm a teapot

" + + +class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + def log_message(self, format, *args): + pass + + def do_GET(self): + if self.path == '/teapot': + self.send_response(TEAPOT_RESPONSE_STATUS) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.end_headers() + self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) + else: + assert False class TestIE(InfoExtractor): @@ -41,6 +61,7 @@ class TestInfoExtractor(unittest.TestCase): + ''' self.assertEqual(ie._og_search_title(html), 'Foo') self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') @@ -49,6 +70,7 @@ class TestInfoExtractor(unittest.TestCase): self.assertEqual(ie._og_search_property('foobar', html), 'Foo') self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') + self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph') self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) @@ -179,7 +201,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ def test_parse_m3u8_formats(self): _TEST_CASES = [ ( - # https://github.com/rg3/youtube-dl/issues/11507 + # https://github.com/ytdl-org/youtube-dl/issues/11507 # http://pluzz.francetv.fr/videos/le_ministere.html 'pluzz_francetv_11507', 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais', @@ -241,7 +263,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ }] ), ( - # https://github.com/rg3/youtube-dl/issues/11995 + # https://github.com/ytdl-org/youtube-dl/issues/11995 # http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor 'teamcoco_11995', 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8', @@ -315,7 +337,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ }] ), ( - # https://github.com/rg3/youtube-dl/issues/12211 + # https://github.com/ytdl-org/youtube-dl/issues/12211 # http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601 'toggle_mobile_12211', 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8', @@ -477,7 +499,64 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'width': 1280, 'height': 720, }] - ) + ), + ( + # https://github.com/ytdl-org/youtube-dl/issues/18923 + # https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa + 'ted_18923', + 'http://hls.ted.com/talks/31241.m3u8', + [{ + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '600k-Audio', + 'vcodec': 'none', + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '68', + 'vcodec': 'none', + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '163', + 'acodec': 'none', + 'width': 320, + 'height': 180, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '481', + 'acodec': 'none', + 'width': 512, + 'height': 288, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '769', + 'acodec': 'none', + 'width': 512, + 'height': 288, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '984', + 'acodec': 'none', + 'width': 512, + 'height': 288, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '1255', + 'acodec': 'none', + 'width': 640, + 'height': 360, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '1693', + 'acodec': 'none', + 'width': 853, + 'height': 480, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '2462', + 'acodec': 'none', + 'width': 1280, + 'height': 720, + }] + ), ] for m3u8_file, m3u8_url, expected_formats in _TEST_CASES: @@ -488,6 +567,279 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) + def test_parse_mpd_formats(self): + _TEST_CASES = [ + ( + # https://github.com/ytdl-org/youtube-dl/issues/13919 + # Also tests duplicate representation ids, see + # https://github.com/ytdl-org/youtube-dl/issues/15111 + 'float_duration', + 'http://unknown/manifest.mpd', + [{ + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'm4a', + 'format_id': '318597', + 'format_note': 'DASH audio', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'none', + 'tbr': 61.587, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '318597', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.42001f', + 'tbr': 318.597, + 'width': 340, + 'height': 192, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '638590', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.42001f', + 'tbr': 638.59, + 'width': 512, + 'height': 288, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '1022565', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.4d001f', + 'tbr': 1022.565, + 'width': 688, + 'height': 384, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '2046506', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.4d001f', + 'tbr': 2046.506, + 'width': 1024, + 'height': 576, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '3998017', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.640029', + 'tbr': 3998.017, + 'width': 1280, + 'height': 720, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '5997485', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.640032', + 'tbr': 5997.485, + 'width': 1920, + 'height': 1080, + }] + ), ( + # https://github.com/ytdl-org/youtube-dl/pull/14844 + 'urls_only', + 'http://unknown/manifest.mpd', + [{ + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_144p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 200, + 'width': 256, + 'height': 144, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_240p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 400, + 'width': 424, + 'height': 240, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_360p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 800, + 'width': 640, + 'height': 360, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_480p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 1200, + 'width': 856, + 'height': 480, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_576p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 1600, + 'width': 1024, + 'height': 576, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_720p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 2400, + 'width': 1280, + 'height': 720, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_1080p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 4400, + 'width': 1920, + 'height': 1080, + }] + ) + ] + + for mpd_file, mpd_url, expected_formats in _TEST_CASES: + with io.open('./test/testdata/mpd/%s.mpd' % mpd_file, + mode='r', encoding='utf-8') as f: + formats = self.ie._parse_mpd_formats( + compat_etree_fromstring(f.read().encode('utf-8')), + mpd_url=mpd_url) + self.ie._sort_formats(formats) + expect_value(self, formats, expected_formats, None) + + def test_parse_f4m_formats(self): + _TEST_CASES = [ + ( + # https://github.com/ytdl-org/youtube-dl/issues/14660 + 'custom_base_url', + 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', + [{ + 'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', + 'ext': 'flv', + 'format_id': '2148', + 'protocol': 'f4m', + 'tbr': 2148, + 'width': 1280, + 'height': 720, + }] + ), + ] + + for f4m_file, f4m_url, expected_formats in _TEST_CASES: + with io.open('./test/testdata/f4m/%s.f4m' % f4m_file, + mode='r', encoding='utf-8') as f: + formats = self.ie._parse_f4m_formats( + compat_etree_fromstring(f.read().encode('utf-8')), + f4m_url, None) + self.ie._sort_formats(formats) + expect_value(self, formats, expected_formats, None) + + def test_parse_xspf(self): + _TEST_CASES = [ + ( + 'foo_xspf', + 'https://example.org/src/foo_xspf.xspf', + [{ + 'id': 'foo_xspf', + 'title': 'Pandemonium', + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 202.416, + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/src/cd1/track%201.mp3', + }], + }, { + 'id': 'foo_xspf', + 'title': 'Final Cartridge (Nichico Twelve Remix)', + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 255.857, + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3', + }], + }, { + 'id': 'foo_xspf', + 'title': 'Rebuilding Nightingale', + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 287.915, + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/src/track3.mp3', + }, { + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.com/track3.mp3', + }] + }] + ), + ] + + for xspf_file, xspf_url, expected_entries in _TEST_CASES: + with io.open('./test/testdata/xspf/%s.xspf' % xspf_file, + mode='r', encoding='utf-8') as f: + entries = self.ie._parse_xspf( + compat_etree_fromstring(f.read().encode('utf-8')), + xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) + expect_value(self, entries, expected_entries, None) + for i in range(len(entries)): + expect_dict(self, entries[i], expected_entries[i]) + + def test_response_with_expected_status_returns_content(self): + # Checks for mitigations against the effects of + # that affect Python 3.4.1+, which + # manifest as `_download_webpage`, `_download_xml`, `_download_json`, + # or the underlying `_download_webpage_handle` returning no content + # when a response matches `expected_status`. + + httpd = compat_http_server.HTTPServer( + ('127.0.0.1', 0), InfoExtractorTestRequestHandler) + port = http_server_port(httpd) + server_thread = threading.Thread(target=httpd.serve_forever) + server_thread.daemon = True + server_thread.start() + + (content, urlh) = self.ie._download_webpage_handle( + 'http://127.0.0.1:%d/teapot' % port, None, + expected_status=TEAPOT_RESPONSE_STATUS) + self.assertEqual(content, TEAPOT_RESPONSE_BODY) + if __name__ == '__main__': unittest.main()