From: Sergey M. Date: Thu, 2 Oct 2014 12:38:29 +0000 (+0700) Subject: Merge pull request #3865 from diffycat/jpopsuki X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=852f8641e8fb910160b809bb05f7c2533882195d;hp=d5feab9aaa9102f4dbc24c39f504f064c2888746;p=youtube-dl Merge pull request #3865 from diffycat/jpopsuki [jpopsuki] Support category links --- diff --git a/README.md b/README.md index 0f7442906..cabc5eb9a 100644 --- a/README.md +++ b/README.md @@ -348,21 +348,34 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231 # FAQ -### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists +### How do I update youtube-dl? -YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos. +If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`). + +If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update. -If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to report bugs to the Ubuntu packaging guys - all they have to do is update the package to a somewhat recent version. +If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distributions serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum. -Alternatively, uninstall the youtube-dl package and follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html). In a pinch, this should do if you used `apt-get` before to install youtube-dl: +As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like + + sudo apt-get remove -y youtube-dl + +Afterwards, simply follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html): ``` -sudo apt-get remove -y youtube-dl sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl sudo chmod a+x /usr/local/bin/youtube-dl hash -r ``` +Again, from then on you'll be able to update with `sudo youtube-dl -U`. + +### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists + +YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos. + +If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to report bugs to the Ubuntu packaging guys - all they have to do is update the package to a somewhat recent version. See above for a way to update. + ### Do I always have to pass in `--max-quality FORMAT`, or `-citw`? By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, `--max-quality` *limits* the video quality (so if you want the best quality, do NOT pass it in), and the only option out of `-citw` that is regularly useful is `-i`. diff --git a/test/helper.py b/test/helper.py index 7f3ab8438..62cb3ce02 100644 --- a/test/helper.py +++ b/test/helper.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import errno import io import hashlib @@ -12,6 +14,7 @@ from youtube_dl import YoutubeDL from youtube_dl.utils import ( compat_str, preferredencoding, + write_string, ) @@ -40,10 +43,10 @@ def report_warning(message): If stderr is a tty file the 'WARNING:' will be colored ''' if sys.stderr.isatty() and os.name != 'nt': - _msg_header = u'\033[0;33mWARNING:\033[0m' + _msg_header = '\033[0;33mWARNING:\033[0m' else: - _msg_header = u'WARNING:' - output = u'%s %s\n' % (_msg_header, message) + _msg_header = 'WARNING:' + output = '%s %s\n' % (_msg_header, message) if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3: output = output.encode(preferredencoding()) sys.stderr.write(output) @@ -103,22 +106,22 @@ def expect_info_dict(self, expected_dict, got_dict): self.assertTrue( isinstance(got, compat_str), - u'Expected a %s object, but got %s for field %s' % ( + 'Expected a %s object, but got %s for field %s' % ( compat_str.__name__, type(got).__name__, info_field)) self.assertTrue( match_rex.match(got), - u'field %s (value: %r) should match %r' % (info_field, got, match_str)) + 'field %s (value: %r) should match %r' % (info_field, got, match_str)) elif isinstance(expected, type): got = got_dict.get(info_field) self.assertTrue(isinstance(got, expected), - u'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got))) + 'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got))) else: if isinstance(expected, compat_str) and expected.startswith('md5:'): got = 'md5:' + md5(got_dict.get(info_field)) else: got = got_dict.get(info_field) self.assertEqual(expected, got, - u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) + 'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) # Check for the presence of mandatory fields if got_dict.get('_type') != 'playlist': @@ -126,7 +129,7 @@ def expect_info_dict(self, expected_dict, got_dict): self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) # Check for mandatory fields that are automatically set by YoutubeDL for key in ['webpage_url', 'extractor', 'extractor_key']: - self.assertTrue(got_dict.get(key), u'Missing field: %s' % key) + self.assertTrue(got_dict.get(key), 'Missing field: %s' % key) # Are checkable fields missing from the test case definition? test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) @@ -134,7 +137,15 @@ def expect_info_dict(self, expected_dict, got_dict): if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) if missing_keys: - sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n') + def _repr(v): + if isinstance(v, compat_str): + return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'") + else: + return repr(v) + info_dict_str = ''.join( + ' %s: %s,\n' % (_repr(k), _repr(v)) + for k, v in test_info_dict.items()) + write_string('\n"info_dict": {' + info_dict_str + '}\n', out=sys.stderr) self.assertFalse( missing_keys, 'Missing keys in test definition: %s' % ( diff --git a/test/test_utils.py b/test/test_utils.py index 3efbed29d..bcca0efea 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -22,7 +22,8 @@ from youtube_dl.utils import ( fix_xml_ampersands, get_meta_content, orderedSet, - PagedList, + OnDemandPagedList, + InAdvancePagedList, parse_duration, read_batch_urls, sanitize_filename, @@ -43,6 +44,7 @@ from youtube_dl.utils import ( limit_length, escape_rfc3986, escape_url, + js_to_json, ) @@ -137,6 +139,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214') self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011') self.assertEqual(unified_strdate('1968-12-10'), '19681210') + self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128') def test_find_xpath_attr(self): testxml = ''' @@ -246,10 +249,14 @@ class TestUtil(unittest.TestCase): for i in range(firstid, upto): yield i - pl = PagedList(get_page, pagesize) + pl = OnDemandPagedList(get_page, pagesize) got = pl.getslice(*sliceargs) self.assertEqual(got, expected) + iapl = InAdvancePagedList(get_page, size // pagesize + 1, pagesize) + got = iapl.getslice(*sliceargs) + self.assertEqual(got, expected) + testPL(5, 2, (), [0, 1, 2, 3, 4]) testPL(5, 2, (1,), [1, 2, 3, 4]) testPL(5, 2, (2,), [2, 3, 4]) @@ -325,5 +332,28 @@ class TestUtil(unittest.TestCase): ) self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') + def test_js_to_json_realworld(self): + inp = '''{ + 'clip':{'provider':'pseudo'} + }''' + self.assertEqual(js_to_json(inp), '''{ + "clip":{"provider":"pseudo"} + }''') + json.loads(js_to_json(inp)) + + inp = '''{ + 'playlist':[{'controls':{'all':null}}] + }''' + self.assertEqual(js_to_json(inp), '''{ + "playlist":[{"controls":{"all":null}}] + }''') + + def test_js_to_json_edgecases(self): + on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") + self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) + + on = js_to_json('{"abc": true}') + self.assertEqual(json.loads(on), {'abc': True}) + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 604e76ab6..df2cb09f2 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -47,18 +47,6 @@ _TESTS = [ '2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', 'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', ), - ( - 'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf', - 'swf', - 86, - 'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?' - ), - ( - 'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf', - 'swf', - 'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9', - '9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F' - ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js', 'js', diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 86bff185b..e51ea701f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -190,6 +190,7 @@ from .livestream import ( LivestreamOriginalIE, LivestreamShortenerIE, ) +from .lrt import LRTIE from .lynda import ( LyndaIE, LyndaCourseIE @@ -354,6 +355,7 @@ from .swrmediathek import SWRMediathekIE from .syfy import SyfyIE from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE +from .tapely import TapelyIE from .teachertube import ( TeacherTubeIE, TeacherTubeUserIE, @@ -371,7 +373,10 @@ from .thisav import ThisAVIE from .tinypic import TinyPicIE from .tlc import TlcIE, TlcDeIE from .tnaflix import TNAFlixIE -from .thvideo import THVideoIE +from .thvideo import ( + THVideoIE, + THVideoPlaylistIE +) from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 957d35979..c3d02f85e 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -86,11 +86,15 @@ class ArteTVPlus7IE(InfoExtractor): info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] + upload_date_str = player_info.get('shootingDate') + if not upload_date_str: + upload_date_str = player_info.get('VDA', '').split(' ')[0] + info_dict = { 'id': player_info['VID'], 'title': player_info['VTI'], 'description': player_info.get('VDE'), - 'upload_date': unified_strdate(player_info.get('VDA', '').split(' ')[0]), + 'upload_date': unified_strdate(upload_date_str), 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), } diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index 4e2960c62..2e277c8c3 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -26,6 +26,8 @@ class BRIE(InfoExtractor): 'title': 'Wenn das Traditions-Theater wackelt', 'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt', 'duration': 34, + 'uploader': 'BR', + 'upload_date': '20140802', } }, { @@ -66,8 +68,7 @@ class BRIE(InfoExtractor): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('id') + display_id = self._match_id(url) page = self._download_webpage(url, display_id) xml_url = self._search_regex( r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL') diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py index 65c12136a..d4227e6eb 100644 --- a/youtube_dl/extractor/cliphunter.py +++ b/youtube_dl/extractor/cliphunter.py @@ -35,7 +35,6 @@ class CliphunterIE(InfoExtractor): 'title': 'Fun Jynx Maze solo', 'thumbnail': 're:^https?://.*\.jpg$', 'age_limit': 18, - 'duration': 1317, } } @@ -86,14 +85,11 @@ class CliphunterIE(InfoExtractor): thumbnail = self._search_regex( r"var\s+mov_thumb\s*=\s*'([^']+)';", webpage, 'thumbnail', fatal=False) - duration = int_or_none(self._search_regex( - r'pl_dur\s*=\s*([0-9]+)', webpage, 'duration', fatal=False)) return { 'id': video_id, 'title': video_title, 'formats': formats, - 'duration': duration, 'age_limit': self._rta_search(webpage), 'thumbnail': thumbnail, } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f43a0a569..611cf95f1 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -334,7 +334,11 @@ class InfoExtractor(object): try: return json.loads(json_string) except ValueError as ve: - raise ExtractorError('Failed to download JSON', cause=ve) + errmsg = '%s: Failed to parse JSON ' % video_id + if fatal: + raise ExtractorError(errmsg, cause=ve) + else: + self.report_warning(errmsg + str(ve)) def report_warning(self, msg, video_id=None): idstr = '' if video_id is None else '%s: ' % video_id diff --git a/youtube_dl/extractor/dropbox.py b/youtube_dl/extractor/dropbox.py index 817a9bd61..5f24ac721 100644 --- a/youtube_dl/extractor/dropbox.py +++ b/youtube_dl/extractor/dropbox.py @@ -29,9 +29,8 @@ class DropboxIE(InfoExtractor): video_id = mobj.group('id') fn = compat_urllib_parse_unquote(url_basename(url)) title = os.path.splitext(fn)[0] - video_url = ( - re.sub(r'[?&]dl=0', '', url) + - ('?' if '?' in url else '&') + 'dl=1') + video_url = re.sub(r'[?&]dl=0', '', url) + video_url += ('?' if '?' not in video_url else '&') + 'dl=1' return { 'id': video_id, diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index 522aa3d63..bb231ecb1 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -14,11 +14,11 @@ class EpornerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P\d+)/(?P[\w-]+)' _TEST = { 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', - 'md5': '3b427ae4b9d60619106de3185c2987cd', + 'md5': '39d486f046212d8e1b911c52ab4691f8', 'info_dict': { 'id': '95008', 'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Infamous Tiffany Teen Strip Tease Video', 'duration': 194, 'view_count': int, diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 60e68d98a..3ad993751 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -35,7 +35,7 @@ class FacebookIE(InfoExtractor): 'id': '637842556329505', 'ext': 'mp4', 'duration': 38, - 'title': 'Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam fin...', + 'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam', } }, { 'note': 'Video without discernible title', diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 721e5fce0..d966e8403 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -21,7 +21,7 @@ class FunnyOrDieIE(InfoExtractor): }, }, { 'url': 'http://www.funnyordie.com/embed/e402820827', - 'md5': 'ff4d83318f89776ed0250634cfaa8d36', + 'md5': '29f4c5e5a61ca39dfd7e8348a75d0aad', 'info_dict': { 'id': 'e402820827', 'ext': 'mp4', diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0dfa4853d..14c024e48 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -155,7 +155,6 @@ class GenericIE(InfoExtractor): # funnyordie embed { 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns', - 'md5': '7cf780be104d40fea7bae52eed4a470e', 'info_dict': { 'id': '18e820ec3f', 'ext': 'mp4', @@ -180,13 +179,13 @@ class GenericIE(InfoExtractor): # Embedded TED video { 'url': 'http://en.support.wordpress.com/videos/ted-talks/', - 'md5': 'deeeabcc1085eb2ba205474e7235a3d5', + 'md5': '65fdff94098e4a607385a60c5177c638', 'info_dict': { - 'id': '981', + 'id': '1969', 'ext': 'mp4', - 'title': 'My web playroom', - 'uploader': 'Ze Frank', - 'description': 'md5:ddb2a40ecd6b6a147e400e535874947b', + 'title': 'Hidden miracles of the natural world', + 'uploader': 'Louie Schwartzberg', + 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9', } }, # Embeded Ustream video @@ -226,21 +225,6 @@ class GenericIE(InfoExtractor): 'skip_download': 'Requires rtmpdump' } }, - # smotri embed - { - 'url': 'http://rbctv.rbc.ru/archive/news/562949990879132.shtml', - 'md5': 'ec40048448e9284c9a1de77bb188108b', - 'info_dict': { - 'id': 'v27008541fad', - 'ext': 'mp4', - 'title': 'Крым и Севастополь вошли в состав России', - 'description': 'md5:fae01b61f68984c7bd2fa741e11c3175', - 'duration': 900, - 'upload_date': '20140318', - 'uploader': 'rbctv_2012_4', - 'uploader_id': 'rbctv_2012_4', - }, - }, # Condé Nast embed { 'url': 'http://www.wired.com/2014/04/honda-asimo/', @@ -295,13 +279,13 @@ class GenericIE(InfoExtractor): { 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM', 'info_dict': { - 'id': 'jpSGZsgga_I', + 'id': '4vAffPZIT44', 'ext': 'mp4', - 'title': 'Asphalt 8: Airborne - Launch Trailer', + 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!', 'uploader': 'Gameloft', 'uploader_id': 'gameloft', - 'upload_date': '20130821', - 'description': 'md5:87bd95f13d8be3e7da87a5f2c443106a', + 'upload_date': '20140828', + 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4', }, 'params': { 'skip_download': True, diff --git a/youtube_dl/extractor/godtube.py b/youtube_dl/extractor/godtube.py index 73bd6d890..363dc6608 100644 --- a/youtube_dl/extractor/godtube.py +++ b/youtube_dl/extractor/godtube.py @@ -36,16 +36,16 @@ class GodTubeIE(InfoExtractor): 'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(), video_id, 'Downloading player config XML') - video_url = config.find('.//file').text - uploader = config.find('.//author').text - timestamp = parse_iso8601(config.find('.//date').text) - duration = parse_duration(config.find('.//duration').text) - thumbnail = config.find('.//image').text + video_url = config.find('file').text + uploader = config.find('author').text + timestamp = parse_iso8601(config.find('date').text) + duration = parse_duration(config.find('duration').text) + thumbnail = config.find('image').text media = self._download_xml( 'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML') - title = media.find('.//title').text + title = media.find('title').text return { 'id': video_id, diff --git a/youtube_dl/extractor/golem.py b/youtube_dl/extractor/golem.py index bebfe8568..53714f47f 100644 --- a/youtube_dl/extractor/golem.py +++ b/youtube_dl/extractor/golem.py @@ -38,11 +38,9 @@ class GolemIE(InfoExtractor): } formats = [] - for e in config.findall('./*[url]'): + for e in config: url = e.findtext('./url') if not url: - self._downloader.report_warning( - "{0}: url: empty, skipping".format(e.tag)) continue formats.append({ @@ -57,7 +55,7 @@ class GolemIE(InfoExtractor): info['formats'] = formats thumbnails = [] - for e in config.findall('.//teaser[url]'): + for e in config.findall('.//teaser'): url = e.findtext('./url') if not url: continue diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index 12e9e61c4..c80185b53 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -89,7 +89,12 @@ class IGNIE(InfoExtractor): ']*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]', webpage) if multiple_urls: - return [self.url_result(u, ie='IGN') for u in multiple_urls] + entries = [self.url_result(u, ie='IGN') for u in multiple_urls] + return { + '_type': 'playlist', + 'id': name_or_id, + 'entries': entries, + } video_id = self._find_video_id(webpage) result = self._get_video_info(video_id) diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 4ddda2f1b..53f9a5f75 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -12,12 +14,13 @@ class InternetVideoArchiveIE(InfoExtractor): _VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?' _TEST = { - u'url': u'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247', - u'file': u'452693.mp4', - u'info_dict': { - u'title': u'SKYFALL', - u'description': u'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.', - u'duration': 153, + 'url': 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247', + 'info_dict': { + 'id': '452693', + 'ext': 'mp4', + 'title': 'SKYFALL', + 'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.', + 'duration': 149, }, } @@ -42,7 +45,7 @@ class InternetVideoArchiveIE(InfoExtractor): url = self._build_url(query) flashconfiguration = self._download_xml(url, video_id, - u'Downloading flash configuration') + 'Downloading flash configuration') file_url = flashconfiguration.find('file').text file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx') # Replace some of the parameters in the query to get the best quality @@ -51,7 +54,7 @@ class InternetVideoArchiveIE(InfoExtractor): lambda m: self._clean_query(m.group()), file_url) info = self._download_xml(file_url, video_id, - u'Downloading video info') + 'Downloading video info') item = info.find('channel/item') def _bp(p): diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py index a83dd249f..07ef682ee 100644 --- a/youtube_dl/extractor/izlesene.py +++ b/youtube_dl/extractor/izlesene.py @@ -63,7 +63,8 @@ class IzleseneIE(InfoExtractor): title = self._og_search_title(webpage) description = self._og_search_description(webpage) - thumbnail = self._og_search_thumbnail(webpage) + thumbnail = self._proto_relative_url( + self._og_search_thumbnail(webpage), scheme='http:') uploader = self._html_search_regex( r"adduserUsername\s*=\s*'([^']+)';", diff --git a/youtube_dl/extractor/jukebox.py b/youtube_dl/extractor/jukebox.py index 9b553b9fa..5aa32bf09 100644 --- a/youtube_dl/extractor/jukebox.py +++ b/youtube_dl/extractor/jukebox.py @@ -11,10 +11,9 @@ from ..utils import ( class JukeboxIE(InfoExtractor): - _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P[a-z0-9\-]+)\.html' + _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P[a-z0-9\-]+)\.html' _TEST = { 'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html', - 'md5': '1574e9b4d6438446d5b7dbcdf2786276', 'info_dict': { 'id': 'r303r', 'ext': 'flv', @@ -24,8 +23,7 @@ class JukeboxIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + video_id = self._match_id(url) html = self._download_webpage(url, video_id) iframe_url = unescapeHTML(self._search_regex(r'