From: Philipp Hagemeister Date: Mon, 9 Dec 2013 03:49:32 +0000 (+0100) Subject: Merge remote-tracking branch 'jaimeMF/yt-toplists' X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=ffa8f0df0a878463078467709f615b1e57c61ec1;hp=0a688bc0b28c970e9af965b3fa0c7927507eeb97;p=youtube-dl Merge remote-tracking branch 'jaimeMF/yt-toplists' --- diff --git a/README.md b/README.md index af4d969d6..9d4835053 100644 --- a/README.md +++ b/README.md @@ -30,13 +30,16 @@ which means you can modify it, redistribute it or use it however you like. --list-extractors List all supported extractors and the URLs they would handle --extractor-descriptions Output descriptions of all supported extractors - --proxy URL Use the specified HTTP/HTTPS proxy + --proxy URL Use the specified HTTP/HTTPS proxy. Pass in an + empty string (--proxy "") for direct connection --no-check-certificate Suppress HTTPS certificate validation. --cache-dir DIR Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache /youtube-dl . --no-cache-dir Disable filesystem caching + --bidi-workaround Work around terminals that lack bidirectional + text support. Requires fribidi executable in PATH ## Video Selection: --playlist-start NUMBER playlist video to start at (default is 1) @@ -55,8 +58,9 @@ which means you can modify it, redistribute it or use it however you like. --dateafter DATE download only videos uploaded after this date --no-playlist download only the currently playing video --age-limit YEARS download only videos suitable for the given age - --download-archive FILE Download only videos not present in the archive - file. Record all downloaded videos in it. + --download-archive FILE Download only videos not listed in the archive + file. Record the IDs of all downloaded videos in + it. ## Download Options: -r, --rate-limit LIMIT maximum download rate in bytes per second (e.g. @@ -130,8 +134,8 @@ which means you can modify it, redistribute it or use it however you like. -v, --verbose print various debugging information --dump-intermediate-pages print downloaded pages to debug problems(very verbose) - --write-pages Write downloaded pages to files in the current - directory + --write-pages Write downloaded intermediary pages to files in + the current directory to debug problems ## Video Format Options: -f, --format FORMAT video format code, specify the order of @@ -182,7 +186,7 @@ which means you can modify it, redistribute it or use it however you like. # CONFIGURATION -You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. +You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\\youtube-dl.conf`. # OUTPUT TEMPLATE @@ -272,14 +276,54 @@ This README file was originally written by Daniel Bolton ( +Bugs and suggestions should be reported at: . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. -Please include: - -* Your exact command line, like `youtube-dl -t "http://www.youtube.com/watch?v=uHlDtZ6Oc3s&feature=channel_video_title"`. A common mistake is not to escape the `&`. Putting URLs in quotes should solve this problem. -* If possible re-run the command with `--verbose`, and include the full output, it is really helpful to us. -* The output of `youtube-dl --version` -* The output of `python --version` -* The name and version of your Operating System ("Ubuntu 11.04 x64" or "Windows 7 x64" is usually enough). +Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever. For discussions, join us in the irc channel #youtube-dl on freenode. + +When you submit a request, please re-read it once to avoid a couple of mistakes (you can and should use this as a checklist): + +### Is the description of the issue itself sufficient? + +We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts. + +So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious + +- What the problem is +- How it could be fixed +- How your proposed solution would look like + +If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over. + +For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. + +Site support requests must contain an example URL. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL. + +### Are you using the latest version? + +Before reporting any issue, type youtube-dl -U. This should report that you're up-to-date. Ábout 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well. + +### Is the issue already documented? + +Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or at https://github.com/rg3/youtube-dl/search?type=Issues . If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. + +### Why are existing options not enough? + +Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. + +### Is there enough context in your bug report? + +People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one). + +We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful. + +### Does the issue involve one problem, and one problem only? + +Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones. + +In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, Whitehouse podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service. + +### Is anyone going to need the feature? + +Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. diff --git a/test/parameters.json b/test/parameters.json index f042880ed..487a46d56 100644 --- a/test/parameters.json +++ b/test/parameters.json @@ -39,5 +39,6 @@ "writeinfojson": true, "writesubtitles": false, "allsubtitles": false, - "listssubtitles": false + "listssubtitles": false, + "socket_timeout": 20 } diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 1f1adb6b4..6b9764c67 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -106,6 +106,10 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch(':colbertreport', ['ComedyCentralShows']) self.assertMatch(':cr', ['ComedyCentralShows']) + def test_vimeo_matching(self): + self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel']) + self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user']) + if __name__ == '__main__': unittest.main() diff --git a/test/test_playlists.py b/test/test_playlists.py index 167801ae2..87ca401e5 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -15,13 +15,18 @@ from youtube_dl.extractor import ( DailymotionPlaylistIE, DailymotionUserIE, VimeoChannelIE, + VimeoUserIE, + VimeoAlbumIE, + VimeoGroupsIE, UstreamChannelIE, SoundcloudSetIE, SoundcloudUserIE, LivestreamIE, NHLVideocenterIE, BambuserChannelIE, - BandcampAlbumIE + BandcampAlbumIE, + SmotriCommunityIE, + SmotriUserIE ) @@ -54,6 +59,30 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['title'], u'Vimeo Tributes') self.assertTrue(len(result['entries']) > 24) + def test_vimeo_user(self): + dl = FakeYDL() + ie = VimeoUserIE(dl) + result = ie.extract('http://vimeo.com/nkistudio/videos') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], u'Nki') + self.assertTrue(len(result['entries']) > 65) + + def test_vimeo_album(self): + dl = FakeYDL() + ie = VimeoAlbumIE(dl) + result = ie.extract('http://vimeo.com/album/2632481') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], u'Staff Favorites: November 2013') + self.assertTrue(len(result['entries']) > 12) + + def test_vimeo_groups(self): + dl = FakeYDL() + ie = VimeoGroupsIE(dl) + result = ie.extract('http://vimeo.com/groups/rolexawards') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], u'Rolex Awards for Enterprise') + self.assertTrue(len(result['entries']) > 72) + def test_ustream_channel(self): dl = FakeYDL() ie = UstreamChannelIE(dl) @@ -110,6 +139,24 @@ class TestPlaylists(unittest.TestCase): self.assertIsPlaylist(result) self.assertEqual(result['title'], u'Nightmare Night EP') self.assertTrue(len(result['entries']) >= 4) + + def test_smotri_community(self): + dl = FakeYDL() + ie = SmotriCommunityIE(dl) + result = ie.extract('http://smotri.com/community/video/kommuna') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], u'kommuna') + self.assertEqual(result['title'], u'КПРФ') + self.assertTrue(len(result['entries']) >= 4) + + def test_smotri_user(self): + dl = FakeYDL() + ie = SmotriUserIE(dl) + result = ie.extract('http://smotri.com/user/inspector') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], u'inspector') + self.assertEqual(result['title'], u'Inspector') + self.assertTrue(len(result['entries']) >= 9) if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index e9e590e74..0fa66beec 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -26,6 +26,7 @@ from youtube_dl.utils import ( unsmuggle_url, shell_quote, encodeFilename, + str_to_int, ) if sys.version_info < (3, 0): @@ -176,6 +177,10 @@ class TestUtil(unittest.TestCase): args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')] self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""") + def test_str_to_int(self): + self.assertEqual(str_to_int('123,456'), 123456) + self.assertEqual(str_to_int('123.456'), 123456) + if __name__ == '__main__': unittest.main() diff --git a/test/test_write_info_json.py b/test/test_write_info_json.py index d7177611b..90426a559 100644 --- a/test/test_write_info_json.py +++ b/test/test_write_info_json.py @@ -33,6 +33,7 @@ TEST_ID = 'BaW_jenozKc' INFO_JSON_FILE = TEST_ID + '.info.json' DESCRIPTION_FILE = TEST_ID + '.mp4.description' EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐 +test URL: https://github.com/rg3/youtube-dl/issues/1892 This is a test video for youtube-dl. diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 3ff9716b3..47124932f 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -204,11 +204,27 @@ class FileDownloader(object): """Report destination filename.""" self.to_screen(u'[download] Destination: ' + filename) + def _report_progress_status(self, msg, is_last_line=False): + fullmsg = u'[download] ' + msg + if self.params.get('progress_with_newline', False): + self.to_screen(fullmsg) + else: + if os.name == 'nt': + prev_len = getattr(self, '_report_progress_prev_line_length', + 0) + if prev_len > len(fullmsg): + fullmsg += u' ' * (prev_len - len(fullmsg)) + self._report_progress_prev_line_length = len(fullmsg) + clear_line = u'\r' + else: + clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r') + self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) + self.to_console_title(u'youtube-dl ' + msg) + def report_progress(self, percent, data_len_str, speed, eta): """Report download progress.""" if self.params.get('noprogress', False): return - clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'') if eta is not None: eta_str = self.format_eta(eta) else: @@ -218,14 +234,29 @@ class FileDownloader(object): else: percent_str = 'Unknown %' speed_str = self.format_speed(speed) - if self.params.get('progress_with_newline', False): - self.to_screen(u'[download] %s of %s at %s ETA %s' % - (percent_str, data_len_str, speed_str, eta_str)) + + msg = (u'%s of %s at %s ETA %s' % + (percent_str, data_len_str, speed_str, eta_str)) + self._report_progress_status(msg) + + def report_progress_live_stream(self, downloaded_data_len, speed, elapsed): + if self.params.get('noprogress', False): + return + downloaded_str = format_bytes(downloaded_data_len) + speed_str = self.format_speed(speed) + elapsed_str = FileDownloader.format_seconds(elapsed) + msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str) + self._report_progress_status(msg) + + def report_finish(self, data_len_str, tot_time): + """Report download finished.""" + if self.params.get('noprogress', False): + self.to_screen(u'[download] Download completed') else: - self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' % - (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True) - self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' % - (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip())) + self._report_progress_status( + (u'100%% of %s in %s' % + (data_len_str, self.format_seconds(tot_time))), + is_last_line=True) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" @@ -246,16 +277,7 @@ class FileDownloader(object): """Report it was impossible to resume download.""" self.to_screen(u'[download] Unable to resume') - def report_finish(self, data_len_str, tot_time): - """Report download finished.""" - if self.params.get('noprogress', False): - self.to_screen(u'[download] Download completed') - else: - clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'') - self.to_screen(u'\r%s[download] 100%% of %s in %s' % - (clear_line, data_len_str, self.format_seconds(tot_time))) - - def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live): + def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live, conn): def run_rtmpdump(args): start = time.time() resume_percent = None @@ -301,11 +323,27 @@ class FileDownloader(object): 'eta': eta, 'speed': speed, }) - elif self.params.get('verbose', False): - if not cursor_in_new_line: - self.to_screen(u'') - cursor_in_new_line = True - self.to_screen(u'[rtmpdump] '+line) + else: + # no percent for live streams + mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) + if mobj: + downloaded_data_len = int(float(mobj.group(1))*1024) + time_now = time.time() + speed = self.calc_speed(start, time_now, downloaded_data_len) + self.report_progress_live_stream(downloaded_data_len, speed, time_now - start) + cursor_in_new_line = False + self._hook_progress({ + 'downloaded_bytes': downloaded_data_len, + 'tmpfilename': tmpfilename, + 'filename': filename, + 'status': 'downloading', + 'speed': speed, + }) + elif self.params.get('verbose', False): + if not cursor_in_new_line: + self.to_screen(u'') + cursor_in_new_line = True + self.to_screen(u'[rtmpdump] '+line) proc.wait() if not cursor_in_new_line: self.to_screen(u'') @@ -338,6 +376,8 @@ class FileDownloader(object): basic_args += ['--stop', '1'] if live: basic_args += ['--live'] + if conn: + basic_args += ['--conn', conn] args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] if sys.platform == 'win32' and sys.version_info < (3, 0): @@ -479,7 +519,8 @@ class FileDownloader(object): info_dict.get('page_url', None), info_dict.get('play_path', None), info_dict.get('tc_url', None), - info_dict.get('rtmp_live', False)) + info_dict.get('rtmp_live', False), + info_dict.get('rtmp_conn', None)) # Attempt to download using mplayer if url.startswith('mms') or url.startswith('rtsp'): diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b822930cb..3fbbf3ba0 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -132,6 +132,9 @@ class YoutubeDL(object): cookiefile: File name where cookies should be read from and dumped to. nocheckcertificate:Do not verify SSL certificates proxy: URL of the proxy server to use + socket_timeout: Time to wait for unresponsive hosts, in seconds + bidi_workaround: Work around buggy terminals without bidirectional text + support, using fridibi The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -155,8 +158,45 @@ class YoutubeDL(object): self._download_retcode = 0 self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] + self._err_file = sys.stderr self.params = {} if params is None else params + # Pipe messsages through fribidi + if params.get('bidi_workaround', False): + # fribidi does not support ungetting, so force newlines + params['progress_with_newline'] = True + + for fid in ['_screen_file', '_err_file']: + class FribidiOut(object): + def __init__(self, outfile, errfile): + self.outfile = outfile + self.process = subprocess.Popen( + ['fribidi'], + stdin=subprocess.PIPE, + stdout=outfile, + stderr=errfile) + + def write(self, s): + res = self.process.stdin.write(s) + self.flush() + return res + + def flush(self): + return self.process.stdin.flush() + + def isatty(self): + return self.outfile.isatty() + + try: + vout = FribidiOut(getattr(self, fid), self._err_file) + setattr(self, fid, vout) + except OSError as ose: + if ose.errno == 2: + self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.') + break + else: + raise + if (sys.version_info >= (3,) and sys.platform != 'win32' and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and not params['restrictfilenames']): @@ -205,10 +245,14 @@ class YoutubeDL(object): pp.set_downloader(self) def to_screen(self, message, skip_eol=False): + """Print message to stdout if not in quiet mode.""" + return self.to_stdout(message, skip_eol, check_quiet=True) + + def to_stdout(self, message, skip_eol=False, check_quiet=False): """Print message to stdout if not in quiet mode.""" if self.params.get('logger'): self.params['logger'].debug(message) - elif not self.params.get('quiet', False): + elif not check_quiet or not self.params.get('quiet', False): terminator = [u'\n', u''][skip_eol] output = message + terminator write_string(output, self._screen_file) @@ -220,9 +264,7 @@ class YoutubeDL(object): self.params['logger'].error(message) else: output = message + u'\n' - if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr - output = output.encode(preferredencoding()) - sys.stderr.write(output) + write_string(output, self._err_file) def to_console_title(self, message): if not self.params.get('consoletitle', False): @@ -293,7 +335,7 @@ class YoutubeDL(object): Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored ''' - if sys.stderr.isatty() and os.name != 'nt': + if self._err_file.isatty() and os.name != 'nt': _msg_header = u'\033[0;33mWARNING:\033[0m' else: _msg_header = u'WARNING:' @@ -305,7 +347,7 @@ class YoutubeDL(object): Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. ''' - if sys.stderr.isatty() and os.name != 'nt': + if self._err_file.isatty() and os.name != 'nt': _msg_header = u'\033[0;31mERROR:\033[0m' else: _msg_header = u'ERROR:' @@ -404,7 +446,8 @@ class YoutubeDL(object): for key, value in extra_info.items(): info_dict.setdefault(key, value) - def extract_info(self, url, download=True, ie_key=None, extra_info={}): + def extract_info(self, url, download=True, ie_key=None, extra_info={}, + process=True): ''' Returns a list with a dictionary for each video we find. If 'download', also downloads the videos. @@ -440,7 +483,10 @@ class YoutubeDL(object): 'webpage_url': url, 'extractor_key': ie.ie_key(), }) - return self.process_ie_result(ie_result, download, extra_info) + if process: + return self.process_ie_result(ie_result, download, extra_info) + else: + return ie_result except ExtractorError as de: # An error we somewhat expected self.report_error(compat_str(de), de.format_traceback()) break @@ -473,8 +519,33 @@ class YoutubeDL(object): download, ie_key=ie_result.get('ie_key'), extra_info=extra_info) + elif result_type == 'url_transparent': + # Use the information from the embedding page + info = self.extract_info( + ie_result['url'], ie_key=ie_result.get('ie_key'), + extra_info=extra_info, download=False, process=False) + + def make_result(embedded_info): + new_result = ie_result.copy() + for f in ('_type', 'url', 'ext', 'player_url', 'formats', + 'entries', 'urlhandle', 'ie_key', 'duration', + 'subtitles', 'annotations', 'format', + 'thumbnail', 'thumbnails'): + if f in new_result: + del new_result[f] + if f in embedded_info: + new_result[f] = embedded_info[f] + return new_result + new_result = make_result(info) + + assert new_result.get('_type') != 'url_transparent' + if new_result.get('_type') == 'compat_list': + new_result['entries'] = [ + make_result(e) for e in new_result['entries']] + + return self.process_ie_result( + new_result, download=download, extra_info=extra_info) elif result_type == 'playlist': - # We process each entry in the playlist playlist = ie_result.get('title', None) or ie_result.get('id', None) self.to_screen(u'[download] Downloading playlist: %s' % playlist) @@ -665,22 +736,23 @@ class YoutubeDL(object): # Forced printings if self.params.get('forcetitle', False): - compat_print(info_dict['fulltitle']) + self.to_stdout(info_dict['fulltitle']) if self.params.get('forceid', False): - compat_print(info_dict['id']) + self.to_stdout(info_dict['id']) if self.params.get('forceurl', False): # For RTMP URLs, also include the playpath - compat_print(info_dict['url'] + info_dict.get('play_path', u'')) + self.to_stdout(info_dict['url'] + info_dict.get('play_path', u'')) if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: - compat_print(info_dict['thumbnail']) + self.to_stdout(info_dict['thumbnail']) if self.params.get('forcedescription', False) and info_dict.get('description') is not None: - compat_print(info_dict['description']) + self.to_stdout(info_dict['description']) if self.params.get('forcefilename', False) and filename is not None: - compat_print(filename) + self.to_stdout(filename) if self.params.get('forceformat', False): - compat_print(info_dict['format']) + self.to_stdout(info_dict['format']) if self.params.get('forcejson', False): - compat_print(json.dumps(info_dict)) + info_dict['_filename'] = filename + self.to_stdout(json.dumps(info_dict)) # Do nothing else if in simulate mode if self.params.get('simulate', False): @@ -969,7 +1041,10 @@ class YoutubeDL(object): proxy_map.update(handler.proxies) write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n') - def _setup_opener(self, timeout=20): + def _setup_opener(self): + timeout_val = self.params.get('socket_timeout') + timeout = 600 if timeout_val is None else float(timeout_val) + opts_cookiefile = self.params.get('cookiefile') opts_proxy = self.params.get('proxy') diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 92e583744..2e3f96919 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -36,6 +36,7 @@ __authors__ = ( 'Marcin Cieślak', 'Anton Larionov', 'Takuya Tsuchida', + 'Sergey M.', ) __license__ = 'Public Domain' @@ -80,11 +81,11 @@ from .PostProcessor import ( def parseOpts(overrideArguments=None): - def _readOptions(filename_bytes): + def _readOptions(filename_bytes, default=[]): try: optionf = open(filename_bytes) except IOError: - return [] # silently skip if file is not present + return default # silently skip if file is not present try: res = [] for l in optionf: @@ -190,7 +191,9 @@ def parseOpts(overrideArguments=None): general.add_option('--extractor-descriptions', action='store_true', dest='list_extractor_descriptions', help='Output descriptions of all supported extractors', default=False) - general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') + general.add_option( + '--proxy', dest='proxy', default=None, metavar='URL', + help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option( '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', @@ -198,6 +201,12 @@ def parseOpts(overrideArguments=None): general.add_option( '--no-cache-dir', action='store_const', const=None, dest='cachedir', help='Disable filesystem caching') + general.add_option( + '--socket-timeout', dest='socket_timeout', + type=float, default=None, help=optparse.SUPPRESS_HELP) + general.add_option( + '--bidi-workaround', dest='bidi_workaround', action='store_true', + help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH') selection.add_option('--playlist-start', @@ -220,7 +229,7 @@ def parseOpts(overrideArguments=None): default=None, type=int) selection.add_option('--download-archive', metavar='FILE', dest='download_archive', - help='Download only videos not present in the archive file. Record the IDs of all downloaded videos in it.') + help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.') authentication.add_option('-u', '--username', @@ -415,6 +424,8 @@ def parseOpts(overrideArguments=None): if opts.verbose: write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n') else: + systemConf = _readOptions('/etc/youtube-dl.conf') + xdg_config_home = os.environ.get('XDG_CONFIG_HOME') if xdg_config_home: userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config') @@ -424,8 +435,31 @@ def parseOpts(overrideArguments=None): userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config') if not os.path.isfile(userConfFile): userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') - systemConf = _readOptions('/etc/youtube-dl.conf') - userConf = _readOptions(userConfFile) + userConf = _readOptions(userConfFile, None) + + if userConf is None: + appdata_dir = os.environ.get('appdata') + if appdata_dir: + userConf = _readOptions( + os.path.join(appdata_dir, 'youtube-dl', 'config'), + default=None) + if userConf is None: + userConf = _readOptions( + os.path.join(appdata_dir, 'youtube-dl', 'config.txt'), + default=None) + + if userConf is None: + userConf = _readOptions( + os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'), + default=None) + if userConf is None: + userConf = _readOptions( + os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'), + default=None) + + if userConf is None: + userConf = [] + commandLineConf = sys.argv[1:] argv = systemConf + userConf + commandLineConf opts, args = parser.parse_args(argv) @@ -652,6 +686,8 @@ def _real_main(argv=None): 'cookiefile': opts.cookiefile, 'nocheckcertificate': opts.no_check_certificate, 'proxy': opts.proxy, + 'socket_timeout': opts.socket_timeout, + 'bidi_workaround': opts.bidi_workaround, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0abf86e44..3f740baa1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -8,6 +8,7 @@ from .arte import ( ArteTVPlus7IE, ArteTVCreativeIE, ArteTVFutureIE, + ArteTVDDCIE, ) from .auengine import AUEngineIE from .bambuser import BambuserIE, BambuserChannelIE @@ -56,7 +57,7 @@ from .flickr import FlickrIE from .francetv import ( PluzzIE, FranceTvInfoIE, - France2IE, + FranceTVIE, GenerationQuoiIE ) from .freesound import FreesoundIE @@ -102,6 +103,7 @@ from .nbc import NBCNewsIE from .newgrounds import NewgroundsIE from .nhl import NHLIE, NHLVideocenterIE from .niconico import NiconicoIE +from .ninegag import NineGagIE from .nowvideo import NowVideoIE from .ooyala import OoyalaIE from .orf import ORFIE @@ -110,6 +112,7 @@ from .photobucket import PhotobucketIE from .podomatic import PodomaticIE from .pornhub import PornHubIE from .pornotube import PornotubeIE +from .pyvideo import PyvideoIE from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE from .ringtv import RingTVIE @@ -121,6 +124,12 @@ from .rutube import RutubeIE from .sina import SinaIE from .slashdot import SlashdotIE from .slideshare import SlideshareIE +from .smotri import ( + SmotriIE, + SmotriCommunityIE, + SmotriUserIE, + SmotriBroadcastIE, +) from .sohu import SohuIE from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE from .southparkstudios import ( @@ -139,6 +148,7 @@ from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE from .tf1 import TF1IE +from .theplatform import ThePlatformIE from .thisav import ThisAVIE from .toutv import TouTvIE from .traileraddict import TrailerAddictIE @@ -159,7 +169,13 @@ from .viddler import ViddlerIE from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .videopremium import VideoPremiumIE -from .vimeo import VimeoIE, VimeoChannelIE +from .vimeo import ( + VimeoIE, + VimeoChannelIE, + VimeoUserIE, + VimeoAlbumIE, + VimeoGroupsIE, +) from .vine import VineIE from .viki import VikiIE from .vk import VKIE @@ -167,6 +183,7 @@ from .wat import WatIE from .websurg import WeBSurgIE from .weibo import WeiboIE from .wimp import WimpIE +from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE from .xhamster import XHamsterIE from .xnxx import XNXXIE diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index b99d4b966..a3a1b999d 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -13,7 +13,7 @@ from ..utils import ( class AddAnimeIE(InfoExtractor): - _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P[\w_]+)(?:.*)' + _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P[\w_]+)(?:.*)' IE_NAME = u'AddAnime' _TEST = { u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 6d6237f8a..a527f10de 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -10,7 +10,7 @@ from ..utils import ( class AppleTrailersIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P[^/]+)/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P[^/]+)/(?P[^/]+)' _TEST = { u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/", u"playlist": [ @@ -113,7 +113,7 @@ class AppleTrailersIE(InfoExtractor): }) formats = sorted(formats, key=lambda f: (f['height'], f['width'])) - info = { + playlist.append({ '_type': 'video', 'id': video_id, 'title': title, @@ -124,12 +124,7 @@ class AppleTrailersIE(InfoExtractor): 'upload_date': upload_date, 'uploader_id': uploader_id, 'user_agent': 'QuickTime compatible (youtube-dl)', - } - # TODO: Remove when #980 has been merged - info['url'] = formats[-1]['url'] - info['ext'] = formats[-1]['ext'] - - playlist.append(info) + }) return { '_type': 'playlist', diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index 61ce4469a..8bb546410 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -11,7 +11,7 @@ from ..utils import ( class ArchiveOrgIE(InfoExtractor): IE_NAME = 'archive.org' IE_DESC = 'archive.org videos' - _VALID_URL = r'(?:https?://)?(?:www\.)?archive.org/details/(?P[^?/]+)(?:[?].*)?$' + _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P[^?/]+)(?:[?].*)?$' _TEST = { u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect", u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv', @@ -49,7 +49,7 @@ class ArchiveOrgIE(InfoExtractor): for f in formats: f['ext'] = determine_ext(f['url']) - info = { + return { '_type': 'video', 'id': video_id, 'title': title, @@ -57,12 +57,5 @@ class ArchiveOrgIE(InfoExtractor): 'description': description, 'uploader': uploader, 'upload_date': upload_date, + 'thumbnail': data.get('misc', {}).get('image'), } - thumbnail = data.get('misc', {}).get('image') - if thumbnail: - info['thumbnail'] = thumbnail - - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - - return info diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 8b62ee774..4b7bef775 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -10,6 +10,7 @@ from ..utils import ( determine_ext, get_element_by_id, compat_str, + get_element_by_attribute, ) # There are different sources of video in arte.tv, the extraction process @@ -17,8 +18,8 @@ from ..utils import ( # add tests. class ArteTvIE(InfoExtractor): - _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?Pfr|de)/.*-(?P.*?).html' - _LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?Pfr|de)/(?P.+?)/(?P.+)' + _VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?Pfr|de)/.*-(?P.*?)\.html' + _LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?Pfr|de)/(?P.+?)/(?P.+)' _LIVE_URL = r'index-[0-9]+\.html$' IE_NAME = u'arte.tv' @@ -142,7 +143,9 @@ class ArteTVPlus7IE(InfoExtractor): def _extract_from_webpage(self, webpage, video_id, lang): json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url') + return self._extract_from_json_url(json_url, video_id, lang) + def _extract_from_json_url(self, json_url, video_id, lang): json_info = self._download_webpage(json_url, video_id, 'Downloading info json') self.report_extraction(video_id) info = json.loads(json_info) @@ -257,3 +260,35 @@ class ArteTVFutureIE(ArteTVPlus7IE): webpage = self._download_webpage(url, anchor_id) row = get_element_by_id(anchor_id, webpage) return self._extract_from_webpage(row, anchor_id, lang) + + +class ArteTVDDCIE(ArteTVPlus7IE): + IE_NAME = u'arte.tv:ddc' + _VALID_URL = r'http?://ddc\.arte\.tv/(?Pemission|folge)/(?P.+)' + + _TEST = { + u'url': u'http://ddc.arte.tv/folge/neues-aus-mauretanien', + u'file': u'049881-009_PLUS7-D.flv', + u'info_dict': { + u'title': u'Mit offenen Karten', + u'description': u'md5:57929b0eaeddeb8a0c983f58e9ebd3b6', + u'upload_date': u'20131207', + }, + u'params': { + # rtmp download + u'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id, lang = self._extract_url_info(url) + if lang == 'folge': + lang = 'de' + elif lang == 'emission': + lang = 'fr' + webpage = self._download_webpage(url, video_id) + scriptElement = get_element_by_attribute('class', 'visu_video_block', webpage) + script_url = self._html_search_regex(r'src="(.*?)"', scriptElement, 'script url') + javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator') + json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url') + return self._extract_from_json_url(json_url, video_id, lang) diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index 95c038003..bcccc0b7a 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -16,7 +16,7 @@ class AUEngineIE(InfoExtractor): u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]" } } - _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?' + _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index b80508efe..d48c0c38d 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -54,7 +54,7 @@ class BambuserIE(InfoExtractor): class BambuserChannelIE(InfoExtractor): IE_NAME = u'bambuser:channel' - _VALID_URL = r'http://bambuser.com/channel/(?P.*?)(?:/|#|\?|$)' + _VALID_URL = r'https?://bambuser\.com/channel/(?P.*?)(?:/|#|\?|$)' # The maximum number we can get with each request _STEP = 50 diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index 493504f75..5e33a69df 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -51,8 +51,7 @@ class BlipTVIE(InfoExtractor): url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id') urlp = compat_urllib_parse_urlparse(url) if urlp.path.startswith('/play/'): - request = compat_urllib_request.Request(url) - response = compat_urllib_request.urlopen(request) + response = self._request_webpage(url, None, False) redirecturl = response.geturl() rurlp = compat_urllib_parse_urlparse(redirecturl) file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2] @@ -69,25 +68,23 @@ class BlipTVIE(InfoExtractor): request.add_header('User-Agent', 'iTunes/10.6.1') self.report_extraction(mobj.group(1)) info = None - try: - urlh = compat_urllib_request.urlopen(request) - if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download - basename = url.split('/')[-1] - title,ext = os.path.splitext(basename) - title = title.decode('UTF-8') - ext = ext.replace('.', '') - self.report_direct_download(title) - info = { - 'id': title, - 'url': url, - 'uploader': None, - 'upload_date': None, - 'title': title, - 'ext': ext, - 'urlhandle': urlh - } - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) + urlh = self._request_webpage(request, None, False, + u'unable to download video info webpage') + if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download + basename = url.split('/')[-1] + title,ext = os.path.splitext(basename) + title = title.decode('UTF-8') + ext = ext.replace('.', '') + self.report_direct_download(title) + info = { + 'id': title, + 'url': url, + 'uploader': None, + 'upload_date': None, + 'title': title, + 'ext': ext, + 'urlhandle': urlh + } if info is None: # Regular URL try: json_code_bytes = urlh.read() diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 3666a780b..755d9c9ef 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class BloombergIE(InfoExtractor): - _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P.+?).html' + _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P.+?)\.html' _TEST = { u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py index 0d18e9a7a..43efb08bf 100644 --- a/youtube_dl/extractor/clipfish.py +++ b/youtube_dl/extractor/clipfish.py @@ -17,7 +17,8 @@ class ClipfishIE(InfoExtractor): u'info_dict': { u'title': u'FIFA 14 - E3 2013 Trailer', u'duration': 82, - } + }, + u'skip': 'Blocked in the US' } def _real_extract(self, url): diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 23647f99e..a54ce3ee7 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -1,7 +1,7 @@ import re from .common import InfoExtractor -from .mtv import MTVIE, _media_xml_tag +from .mtv import MTVServicesInfoExtractor from ..utils import ( compat_str, compat_urllib_parse, @@ -11,8 +11,8 @@ from ..utils import ( ) -class ComedyCentralIE(MTVIE): - _VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P.*)' +class ComedyCentralIE(MTVServicesInfoExtractor): + _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)' _FEED_URL = u'http://comedycentral.com/feeds/mrss/' _TEST = { @@ -25,12 +25,6 @@ class ComedyCentralIE(MTVIE): u'description': u'After a certain point, breastfeeding becomes c**kblocking.', }, } - # Overwrite MTVIE properties we don't want - _TESTS = [] - - def _get_thumbnail_url(self, uri, itemdoc): - search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) - return itemdoc.find(search_path).attrib['url'] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -197,7 +191,7 @@ class ComedyCentralShowsIE(InfoExtractor): }) effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1) - info = { + results.append({ 'id': shortMediaId, 'formats': formats, 'uploader': showId, @@ -205,11 +199,6 @@ class ComedyCentralShowsIE(InfoExtractor): 'title': effTitle, 'thumbnail': None, 'description': compat_str(officialTitle), - } - - # TODO: Remove when #980 has been merged - info.update(info['formats'][-1]) - - results.append(info) + }) return results diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4f1b50880..534908a2b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -55,6 +55,9 @@ class InfoExtractor(object): subtitles: The subtitle file contents as a dictionary in the format {language: subtitles}. view_count: How many users have watched the video on the platform. + like_count: Number of positive ratings of the video + dislike_count: Number of negative ratings of the video + comment_count: Number of comments on the video urlhandle: [internal] The urlHandle to be used to download the file, like returned by urllib.request.urlopen age_limit: Age restriction for the video, as an integer (years) @@ -151,27 +154,38 @@ class InfoExtractor(object): def IE_NAME(self): return type(self).__name__[:-2] - def _request_webpage(self, url_or_request, video_id, note=None, errnote=None): + def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): """ Returns the response handle """ if note is None: self.report_download_webpage(video_id) elif note is not False: - self.to_screen(u'%s: %s' % (video_id, note)) + if video_id is None: + self.to_screen(u'%s' % (note,)) + else: + self.to_screen(u'%s: %s' % (video_id, note)) try: return self._downloader.urlopen(url_or_request) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: if errnote is None: errnote = u'Unable to download webpage' - raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err) + errmsg = u'%s: %s' % (errnote, compat_str(err)) + if fatal: + raise ExtractorError(errmsg, sys.exc_info()[2], cause=err) + else: + self._downloader.report_warning(errmsg) + return False - def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None): + def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True): """ Returns a tuple (page content as string, URL handle) """ # Strip hashes from the URL (#1038) if isinstance(url_or_request, (compat_str, str)): url_or_request = url_or_request.partition('#')[0] - urlh = self._request_webpage(url_or_request, video_id, note, errnote) + urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal) + if urlh is False: + assert not fatal + return False content_type = urlh.headers.get('Content-Type', '') webpage_bytes = urlh.read() m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) @@ -206,9 +220,14 @@ class InfoExtractor(object): content = webpage_bytes.decode(encoding, 'replace') return (content, urlh) - def _download_webpage(self, url_or_request, video_id, note=None, errnote=None): + def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): """ Returns the data of the page as a string """ - return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0] + res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal) + if res is False: + return res + else: + content, _ = res + return content def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to download XML'): @@ -364,7 +383,8 @@ class InfoExtractor(object): if display_name is None: display_name = name return self._html_search_regex( - r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\']) + r'''(?ix)<meta + (?=[^>]+(?:itemprop|name|property)=["\']%s["\']) [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), html, display_name, fatal=False) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 7bf03c584..d5730684d 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -6,7 +6,7 @@ from ..utils import ( ) class CSpanIE(InfoExtractor): - _VALID_URL = r'http://www.c-spanvideo.org/program/(.*)' + _VALID_URL = r'http://www\.c-spanvideo\.org/program/(.*)' _TEST = { u'url': u'http://www.c-spanvideo.org/program/HolderonV', u'file': u'315139.flv', diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 71f5e03ee..3bd0b862c 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -11,6 +11,7 @@ from ..utils import ( get_element_by_attribute, get_element_by_id, orderedSet, + str_to_int, ExtractorError, ) @@ -146,6 +147,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): self._list_available_subtitles(video_id, webpage) return + view_count = str_to_int(self._search_regex( + r'video_views_value[^>]+>([\d\.,]+)<', webpage, u'view count')) + return { 'id': video_id, 'formats': formats, @@ -155,6 +159,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): 'subtitles': video_subtitles, 'thumbnail': info['thumbnail_url'], 'age_limit': age_limit, + 'view_count': view_count, } def _get_available_subtitles(self, video_id, webpage): diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 3d1dcb793..d418ce4a8 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -28,7 +28,8 @@ class DaumIE(InfoExtractor): video_id = mobj.group(1) canonical_url = 'http://tvpot.daum.net/v/%s' % video_id webpage = self._download_webpage(canonical_url, video_id) - full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"', + full_id = self._search_regex( + r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]', webpage, u'full id') query = compat_urllib_parse.urlencode({'vid': full_id}) info = self._download_xml( @@ -56,7 +57,7 @@ class DaumIE(InfoExtractor): 'format_id': profile, }) - info = { + return { 'id': video_id, 'title': info.find('TITLE').text, 'formats': formats, @@ -65,6 +66,3 @@ class DaumIE(InfoExtractor): 'duration': int(info.find('DURATION').text), 'upload_date': info.find('REGDTTM').text[:8], } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 3cb382e12..cb7226f82 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -11,7 +11,7 @@ from ..utils import ( class DreiSatIE(InfoExtractor): IE_NAME = '3sat' - _VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' + _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' _TEST = { u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983", u'file': u'36983.webm', @@ -65,7 +65,7 @@ class DreiSatIE(InfoExtractor): return (qidx, prefer_http, format['video_bitrate']) formats.sort(key=_sortkey) - info = { + return { '_type': 'video', 'id': video_id, 'title': video_title, @@ -76,8 +76,3 @@ class DreiSatIE(InfoExtractor): 'uploader': video_uploader, 'upload_date': upload_date, } - - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - - return info diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py index f21ef8853..88f5526b8 100644 --- a/youtube_dl/extractor/eighttracks.py +++ b/youtube_dl/extractor/eighttracks.py @@ -10,7 +10,7 @@ from ..utils import ( class EightTracksIE(InfoExtractor): IE_NAME = '8tracks' - _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$' + _VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$' _TEST = { u"name": u"EightTracks", u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a", diff --git a/youtube_dl/extractor/exfm.py b/youtube_dl/extractor/exfm.py index a51d79b08..682901d16 100644 --- a/youtube_dl/extractor/exfm.py +++ b/youtube_dl/extractor/exfm.py @@ -8,7 +8,7 @@ class ExfmIE(InfoExtractor): IE_NAME = u'exfm' IE_DESC = u'ex.fm' _VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)' - _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream' + _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream' _TESTS = [ { u'url': u'http://ex.fm/song/eh359', diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py index c0169de04..c6ab6952e 100644 --- a/youtube_dl/extractor/faz.py +++ b/youtube_dl/extractor/faz.py @@ -9,7 +9,7 @@ from ..utils import ( class FazIE(InfoExtractor): IE_NAME = u'faz.net' - _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+).html' + _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html' _TEST = { u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html', @@ -44,13 +44,10 @@ class FazIE(InfoExtractor): }) descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description') - info = { + return { 'id': video_id, 'title': self._og_search_title(webpage), 'formats': formats, 'description': descr, 'thumbnail': config.find('STILL/STILL_BIG').text, } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index dba1a8dc2..d7048c8c1 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -12,7 +12,7 @@ from ..utils import ( class FKTVIE(InfoExtractor): IE_NAME = u'fernsehkritik.tv' - _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P<ep>[0-9]+)(?:/.*)?' + _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?' _TEST = { u'url': u'http://fernsehkritik.tv/folge-1', @@ -52,7 +52,7 @@ class FKTVIE(InfoExtractor): class FKTVPosteckeIE(InfoExtractor): IE_NAME = u'fernsehkritik.tv:postecke' - _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)' + _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)' _TEST = { u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120', u'file': u'0120.flv', diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 6e1971043..ad85bc16d 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -21,7 +21,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor): thumbnail_path = info.find('image').text return {'id': video_id, - 'ext': 'mp4', + 'ext': 'flv' if video_url.startswith('rtmp') else 'mp4', 'url': video_url, 'title': info.find('titre').text, 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path), @@ -45,7 +45,7 @@ class PluzzIE(FranceTVBaseInfoExtractor): class FranceTvInfoIE(FranceTVBaseInfoExtractor): IE_NAME = u'francetvinfo.fr' - _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html' + _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html' _TEST = { u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', @@ -66,35 +66,101 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): return self._extract_video(video_id) -class France2IE(FranceTVBaseInfoExtractor): - IE_NAME = u'france2.fr' - _VALID_URL = r'''(?x)https?://www\.france2\.fr/ +class FranceTVIE(FranceTVBaseInfoExtractor): + IE_NAME = u'francetv' + IE_DESC = u'France 2, 3, 4, 5 and Ô' + _VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/ (?: - emissions/.*?/videos/(?P<id>\d+) - | emission/(?P<key>[^/?]+) + emissions/.*?/(videos|emissions)/(?P<id>[^/?]+) + | (emissions?|jt)/(?P<key>[^/?]+) )''' - _TEST = { - u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104', - u'file': u'75540104.mp4', - u'info_dict': { - u'title': u'13h15, le samedi...', - u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d', + _TESTS = [ + # france2 + { + u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104', + u'file': u'75540104.mp4', + u'info_dict': { + u'title': u'13h15, le samedi...', + u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d', + }, + u'params': { + # m3u8 download + u'skip_download': True, + }, }, - u'params': { - u'skip_download': True, + # france3 + { + u'url': u'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575', + u'info_dict': { + u'id': u'000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au', + u'ext': u'flv', + u'title': u'Le scandale du prix des médicaments', + u'description': u'md5:1384089fbee2f04fc6c9de025ee2e9ce', + }, + u'params': { + # rtmp download + u'skip_download': True, + }, }, - } + # france4 + { + u'url': u'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4', + u'info_dict': { + u'id': u'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4', + u'ext': u'flv', + u'title': u'Hero Corp Making of - Extrait 1', + u'description': u'md5:c87d54871b1790679aec1197e73d650a', + }, + u'params': { + # rtmp download + u'skip_download': True, + }, + }, + # france5 + { + u'url': u'http://www.france5.fr/emissions/c-a-dire/videos/92837968', + u'info_dict': { + u'id': u'92837968', + u'ext': u'mp4', + u'title': u'C à dire ?!', + u'description': u'md5:fb1db1cbad784dcce7c7a7bd177c8e2f', + }, + u'params': { + # m3u8 download + u'skip_download': True, + }, + }, + # franceo + { + u'url': u'http://www.franceo.fr/jt/info-afrique/04-12-2013', + u'info_dict': { + u'id': u'92327925', + u'ext': u'mp4', + u'title': u'Infô-Afrique', + u'description': u'md5:ebf346da789428841bee0fd2a935ea55', + }, + u'params': { + # m3u8 download + u'skip_download': True, + }, + u'skip': u'The id changes frequently', + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj.group('key'): webpage = self._download_webpage(url, mobj.group('key')) - video_id = self._html_search_regex( - r'''(?x)<div\s+class="video-player">\s* + id_res = [ + (r'''(?x)<div\s+class="video-player">\s* <a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+ - class="francetv-video-player">''', - webpage, u'video ID') + class="francetv-video-player">'''), + (r'<a id="player_direct" href="http://info\.francetelevisions' + '\.fr/\?id-video=([^"/&]+)'), + (r'<a class="video" id="ftv_player_(.+?)"'), + ] + video_id = self._html_search_regex(id_res, webpage, u'video ID') else: video_id = mobj.group('id') return self._extract_video(video_id) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index c91669b0e..a3a5251fe 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class GamekingsIE(InfoExtractor): - _VALID_URL = r'http?://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)' + _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)' _TEST = { u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/", u'file': u'20130811.mp4', diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 9645b00c3..26b7d2ae5 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -47,13 +47,10 @@ class GameSpotIE(InfoExtractor): 'format_id': q, }) - info = { + return { 'id': data_video['guid'], 'title': compat_urllib_parse.unquote(data_video['title']), 'formats': formats, 'description': get_meta_content('description', webpage), 'thumbnail': self._og_search_thumbnail(webpage), } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info diff --git a/youtube_dl/extractor/gametrailers.py b/youtube_dl/extractor/gametrailers.py index 3cc02d97e..d82a5d4b2 100644 --- a/youtube_dl/extractor/gametrailers.py +++ b/youtube_dl/extractor/gametrailers.py @@ -1,13 +1,10 @@ import re -from .mtv import MTVIE, _media_xml_tag +from .mtv import MTVServicesInfoExtractor -class GametrailersIE(MTVIE): - """ - Gametrailers use the same videos system as MTVIE, it just changes the feed - url, where the uri is and the method to get the thumbnails. - """ - _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)' + +class GametrailersIE(MTVServicesInfoExtractor): + _VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)' _TEST = { u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer', u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4', @@ -17,15 +14,9 @@ class GametrailersIE(MTVIE): u'description': u'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!', }, } - # Overwrite MTVIE properties we don't want - _TESTS = [] _FEED_URL = 'http://www.gametrailers.com/feeds/mrss' - def _get_thumbnail_url(self, uri, itemdoc): - search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) - return itemdoc.find(search_path).attrib['url'] - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 37671430a..216e03218 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -169,8 +169,13 @@ class GenericIE(InfoExtractor): # Site Name | Video Title # Video Title - Tagline | Site Name # and so on and so forth; it's just not practical - video_title = self._html_search_regex(r'<title>(.*)', - webpage, u'video title', default=u'video', flags=re.DOTALL) + video_title = self._html_search_regex( + r'(?s)(.*?)', webpage, u'video title', + default=u'video') + + # video uploader is domain name + video_uploader = self._search_regex( + r'^(?:https?://)?([^/]*)/.*', url, u'video uploader') # Look for BrightCove: bc_url = BrightcoveIE._extract_brightcove_url(webpage) @@ -188,13 +193,35 @@ class GenericIE(InfoExtractor): # Look for embedded YouTube player matches = re.findall( - r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage) + r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage) if matches: urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube') for tuppl in matches] return self.playlist_result( urlrs, playlist_id=video_id, playlist_title=video_title) + # Look for embedded Dailymotion player + matches = re.findall( + r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage) + if matches: + urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion') + for tuppl in matches] + return self.playlist_result( + urlrs, playlist_id=video_id, playlist_title=video_title) + + # Look for embedded Wistia player + match = re.search( + r']+?src=(["\'])(?P(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage) + if match: + return { + '_type': 'url_transparent', + 'url': unescapeHTML(match.group('url')), + 'ie_key': 'Wistia', + 'uploader': video_uploader, + 'title': video_title, + 'id': video_id, + } + # Look for Bandcamp pages with custom domain mobj = re.search(r']*?content="(.*?bandcamp\.com.*?)"', webpage) if mobj is not None: @@ -238,14 +265,9 @@ class GenericIE(InfoExtractor): # here's a fun little line of code for you: video_id = os.path.splitext(video_id)[0] - # video uploader is domain name - video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*', - url, u'video uploader') - return { 'id': video_id, 'url': video_url, 'uploader': video_uploader, - 'upload_date': None, 'title': video_title, } diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index 3798118a7..0ee74fb38 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -11,7 +11,7 @@ class HotNewHipHopIE(InfoExtractor): u'file': u'1435540.mp3', u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96', u'info_dict': { - u"title": u"Freddie Gibbs - Lay It Down" + u"title": u'Freddie Gibbs "Lay It Down"' } } diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index c52146f7d..57b79a336 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -103,7 +103,7 @@ class IGNIE(InfoExtractor): class OneUPIE(IGNIE): """Extractor for 1up.com, it uses the ign videos system.""" - _VALID_URL = r'https?://gamevideos.1up.com/(?Pvideo)/id/(?P.+)' + _VALID_URL = r'https?://gamevideos\.1up\.com/(?Pvideo)/id/(?P.+)' IE_NAME = '1up.com' _DESCRIPTION_RE = r'
(.+?)
' diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index d8e9712a7..6fb373db2 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -21,7 +21,6 @@ class ImdbIE(InfoExtractor): u'ext': u'mp4', u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb', u'description': u'md5:9061c2219254e5d14e03c25c98e96a81', - u'duration': 151, } } @@ -35,6 +34,7 @@ class ImdbIE(InfoExtractor): flags=re.MULTILINE) formats = [] for f_id, f_path in available_formats: + f_path = f_path.strip() format_page = self._download_webpage( compat_urlparse.urljoin(url, f_path), u'Downloading info for %s format' % f_id) @@ -46,7 +46,6 @@ class ImdbIE(InfoExtractor): formats.append({ 'format_id': f_id, 'url': format_info['url'], - 'height': int(info['titleObject']['encoding']['selected'][:-1]), }) return { @@ -55,5 +54,4 @@ class ImdbIE(InfoExtractor): 'formats': formats, 'description': descr, 'thumbnail': format_info['slate'], - 'duration': int(info['titleObject']['title']['duration_seconds']), } diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 213aac428..660573d02 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -3,7 +3,7 @@ import re from .common import InfoExtractor class InstagramIE(InfoExtractor): - _VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/' + _VALID_URL = r'(?:http://)?instagram\.com/p/(.*?)/' _TEST = { u'url': u'http://instagram.com/p/aye83DjauH/?foo=bar#abc', u'file': u'aye83DjauH.mp4', diff --git a/youtube_dl/extractor/jukebox.py b/youtube_dl/extractor/jukebox.py index c7bb234fe..592c64e1d 100644 --- a/youtube_dl/extractor/jukebox.py +++ b/youtube_dl/extractor/jukebox.py @@ -8,7 +8,7 @@ from ..utils import ( ) class JukeboxIE(InfoExtractor): - _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P[a-z0-9\-]+).html' + _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P[a-z0-9\-]+)\.html' _IFRAME = r'