From: Philipp Hagemeister Date: Fri, 22 Aug 2014 00:45:21 +0000 (+0200) Subject: Merge remote-tracking branch 'liudongmiao/patch-subtitle' X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=bd9820c937be846e0e1067395fbf133102c45e79;hp=7e660ac113b5af8f92de2bbc9579426ea3d89581;p=youtube-dl Merge remote-tracking branch 'liudongmiao/patch-subtitle' --- diff --git a/Makefile b/Makefile index c079761ef..088a9320b 100644 --- a/Makefile +++ b/Makefile @@ -6,10 +6,10 @@ clean: cleanall: clean rm -f youtube-dl youtube-dl.exe -PREFIX=/usr/local -BINDIR=$(PREFIX)/bin -MANDIR=$(PREFIX)/man -PYTHON=/usr/bin/env python +PREFIX ?= /usr/local +BINDIR ?= $(PREFIX)/bin +MANDIR ?= $(PREFIX)/man +PYTHON ?= /usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local ifeq ($(PREFIX),/usr) diff --git a/README.md b/README.md index fb2f776c9..ba350b905 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,14 @@ If you do not have curl, you can alternatively use a recent wget: Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29). +OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/). + + brew install youtube-dl + +You can also use pip: + + sudo pip install youtube-dl + Alternatively, refer to the developer instructions below for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html . # DESCRIPTION @@ -38,12 +46,6 @@ which means you can modify it, redistribute it or use it however you like. playlist or the command line) if an error occurs --dump-user-agent display the current browser identification - --user-agent UA specify a custom user agent - --referer REF specify a custom referer, use if the video - access is restricted to one domain - --add-header FIELD:VALUE specify a custom HTTP header and its value, - separated by a colon ':'. You can use this - option multiple times --list-extractors List all supported extractors and the URLs they would handle --extractor-descriptions Output descriptions of all supported @@ -51,35 +53,22 @@ which means you can modify it, redistribute it or use it however you like. --proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection - --no-check-certificate Suppress HTTPS certificate validation. - --prefer-insecure Use an unencrypted connection to retrieve - information about the video. (Currently - supported only for YouTube) - --cache-dir DIR Location in the filesystem where youtube-dl - can store some downloaded information - permanently. By default $XDG_CACHE_HOME - /youtube-dl or ~/.cache/youtube-dl . At the - moment, only YouTube player files (for - videos with obfuscated signatures) are - cached, but that may change. - --no-cache-dir Disable filesystem caching --socket-timeout None Time to wait before giving up, in seconds - --bidi-workaround Work around terminals that lack - bidirectional text support. Requires bidiv - or fribidi executable in PATH --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let - youtube-dl guess. The default value "error" - just throws an error. + youtube-dl guess ("auto_warning" to emit a + warning when guessing). "error" just throws + an error. The default value "fixup_error" + repairs broken URLs, but emits an error if + this is not possible instead of searching. --ignore-config Do not read configuration files. When given in the global configuration file /etc /youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows) - --encoding ENCODING Force the specified encoding (experimental) ## Video Selection: --playlist-start NUMBER playlist video to start at (default is 1) @@ -125,9 +114,9 @@ which means you can modify it, redistribute it or use it however you like. of SIZE. ## Filesystem Options: - -t, --title use title in file name (default) + -a, --batch-file FILE file containing URLs to download ('-' for + stdin) --id use only video ID in file name - -l, --literal [deprecated] alias of --title -A, --auto-number number downloaded files starting from 00000 -o, --output TEMPLATE output filename template. Use %(title)s to get the title, %(uploader)s for the @@ -160,18 +149,15 @@ which means you can modify it, redistribute it or use it however you like. --restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames - -a, --batch-file FILE file containing URLs to download ('-' for - stdin) - --load-info FILE json file containing the video information - (created with the "--write-json" option) + -t, --title [deprecated] use title in file name + (default) + -l, --literal [deprecated] alias of --title -w, --no-overwrites do not overwrite files -c, --continue force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible. --no-continue do not resume partially downloaded files (restart from beginning) - --cookies FILE file to read cookies from and dump cookie - jar in --no-part do not use .part files --no-mtime do not use the Last-modified header to set the file modification time @@ -181,6 +167,19 @@ which means you can modify it, redistribute it or use it however you like. --write-annotations write video annotations to a .annotation file --write-thumbnail write thumbnail image to disk + --load-info FILE json file containing the video information + (created with the "--write-json" option) + --cookies FILE file to read cookies from and dump cookie + jar in + --cache-dir DIR Location in the filesystem where youtube-dl + can store some downloaded information + permanently. By default $XDG_CACHE_HOME + /youtube-dl or ~/.cache/youtube-dl . At the + moment, only YouTube player files (for + videos with obfuscated signatures) are + cached, but that may change. + --no-cache-dir Disable filesystem caching + --rm-cache-dir Delete all filesystem cache files ## Verbosity / Simulation Options: -q, --quiet activates quiet mode @@ -210,6 +209,22 @@ which means you can modify it, redistribute it or use it however you like. problems --print-traffic Display sent and read HTTP traffic +## Workarounds: + --encoding ENCODING Force the specified encoding (experimental) + --no-check-certificate Suppress HTTPS certificate validation. + --prefer-insecure Use an unencrypted connection to retrieve + information about the video. (Currently + supported only for YouTube) + --user-agent UA specify a custom user agent + --referer REF specify a custom referer, use if the video + access is restricted to one domain + --add-header FIELD:VALUE specify a custom HTTP header and its value, + separated by a colon ':'. You can use this + option multiple times + --bidi-workaround Work around terminals that lack + bidirectional text support. Requires bidiv + or fribidi executable in PATH + ## Video Format Options: -f, --format FORMAT video format code, specify the order of preference using slashes: "-f 22/17/18". @@ -296,10 +311,12 @@ The current default template is `%(title)s-%(id)s.%(ext)s`. In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title: - $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc - youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters - $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames - youtube-dl_test_video_.mp4 # A simple file name +```bash +$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc +youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters +$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames +youtube-dl_test_video_.mp4 # A simple file name +``` # VIDEO SELECTION @@ -310,14 +327,16 @@ Videos can be filtered by their upload date using the options `--date`, `--dateb Examples: - # Download only the videos uploaded in the last 6 months - $ youtube-dl --dateafter now-6months +```bash +# Download only the videos uploaded in the last 6 months +$ youtube-dl --dateafter now-6months - # Download only the videos uploaded on January 1, 1970 - $ youtube-dl --date 19700101 +# Download only the videos uploaded on January 1, 1970 +$ youtube-dl --date 19700101 - $ # will only download the videos uploaded in the 200x decade - $ youtube-dl --dateafter 20000101 --datebefore 20091231 +$ # will only download the videos uploaded in the 200x decade +$ youtube-dl --dateafter 20000101 --datebefore 20091231 +``` # FAQ @@ -392,49 +411,48 @@ If you want to add support for a new site, you can follow this quick list (assum 2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git` 3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor` 4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`: - - # coding: utf-8 - from __future__ import unicode_literals - - import re - - from .common import InfoExtractor - - - class YourExtractorIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' - _TEST = { - 'url': 'http://yourextractor.com/watch/42', - 'md5': 'TODO: md5 sum of the first 10KiB of the video file', - 'info_dict': { - 'id': '42', - 'ext': 'mp4', - 'title': 'Video title goes here', - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) - } + ```python + # coding: utf-8 + from __future__ import unicode_literals + + import re + + from .common import InfoExtractor + + + class YourExtractorIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' + _TEST = { + 'url': 'http://yourextractor.com/watch/42', + 'md5': 'TODO: md5 sum of the first 10KiB of the video file', + 'info_dict': { + 'id': '42', + 'ext': 'mp4', + 'title': 'Video title goes here', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) } + } - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - # TODO more code goes here, for example ... - webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'

(.*?)

', webpage, 'title') - - return { - 'id': video_id, - 'title': title, - # TODO more properties (see youtube_dl/extractor/common.py) - } + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + # TODO more code goes here, for example ... + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'

(.*?)

', webpage, 'title') + return { + 'id': video_id, + 'title': title, + # TODO more properties (see youtube_dl/extractor/common.py) + } + ``` 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). -6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. +6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want. 8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501). 9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this: diff --git a/test/helper.py b/test/helper.py index b7299fb82..22d763860 100644 --- a/test/helper.py +++ b/test/helper.py @@ -117,8 +117,9 @@ def expect_info_dict(self, expected_dict, got_dict): u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) # Check for the presence of mandatory fields - for key in ('id', 'url', 'title', 'ext'): - self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) + if got_dict.get('_type') != 'playlist': + for key in ('id', 'url', 'title', 'ext'): + self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) # Check for mandatory fields that are automatically set by YoutubeDL for key in ['webpage_url', 'extractor', 'extractor_key']: self.assertTrue(got_dict.get(key), u'Missing field: %s' % key) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 0ff47cf1e..b1ad30bf1 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -99,6 +99,7 @@ class TestAllURLsMatching(unittest.TestCase): def test_facebook_matching(self): self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) + self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793')) def test_no_duplicates(self): ies = gen_extractors() diff --git a/test/test_download.py b/test/test_download.py index d6540588c..c8d4ec2c8 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -63,15 +63,21 @@ def generator(test_case): def test_template(self): ie = youtube_dl.extractor.get_info_extractor(test_case['name']) other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])] + is_playlist = any(k.startswith('playlist') for k in test_case) + test_cases = test_case.get( + 'playlist', [] if is_playlist else [test_case]) + def print_skipping(reason): print('Skipping %s: %s' % (test_case['name'], reason)) if not ie.working(): print_skipping('IE marked as not _WORKING') return - if 'playlist' not in test_case: - info_dict = test_case.get('info_dict', {}) - if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')): + + for tc in test_cases: + info_dict = tc.get('info_dict', {}) + if not tc.get('file') and not (info_dict.get('id') and info_dict.get('ext')): raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?') + if 'skip' in test_case: print_skipping(test_case['skip']) return @@ -81,6 +87,9 @@ def generator(test_case): return params = get_params(test_case.get('params', {})) + if is_playlist and 'playlist' not in test_case: + params.setdefault('extract_flat', True) + params.setdefault('skip_download', True) ydl = YoutubeDL(params) ydl.add_default_info_extractors() @@ -93,7 +102,6 @@ def generator(test_case): def get_tc_filename(tc): return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {})) - test_cases = test_case.get('playlist', [test_case]) def try_rm_tcs_files(): for tc in test_cases: tc_filename = get_tc_filename(tc) @@ -105,7 +113,10 @@ def generator(test_case): try_num = 1 while True: try: - ydl.download([test_case['url']]) + # We're not using .download here sine that is just a shim + # for outside error handling, and returns the exit code + # instead of the result dict. + res_dict = ydl.extract_info(test_case['url']) except (DownloadError, ExtractorError) as err: # Check if the exception is not a network related one if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503): @@ -121,6 +132,17 @@ def generator(test_case): else: break + if is_playlist: + self.assertEqual(res_dict['_type'], 'playlist') + expect_info_dict(self, test_case.get('info_dict', {}), res_dict) + if 'playlist_mincount' in test_case: + self.assertGreaterEqual( + len(res_dict['entries']), + test_case['playlist_mincount'], + 'Expected at least %d in playlist %s, but got only %d' % ( + test_case['playlist_mincount'], test_case['url'], + len(res_dict['entries']))) + for tc in test_cases: tc_filename = get_tc_filename(tc) if not test_case.get('params', {}).get('skip_download', False): diff --git a/test/test_playlists.py b/test/test_playlists.py index c221c47b9..6448fea38 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -1,6 +1,17 @@ #!/usr/bin/env python # encoding: utf-8 +## DEPRECATED FILE! +# Add new tests to the extractors themselves, like this: +# _TEST = { +# 'url': 'http://example.com/playlist/42', +# 'playlist_mincount': 99, +# 'info_dict': { +# 'id': '42', +# 'title': 'Playlist number forty-two', +# } +# } + from __future__ import unicode_literals # Allow direct execution @@ -193,10 +204,10 @@ class TestPlaylists(unittest.TestCase): def test_bandcamp_album(self): dl = FakeYDL() ie = BandcampAlbumIE(dl) - result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep') + result = ie.extract('http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave') self.assertIsPlaylist(result) - self.assertEqual(result['title'], 'Nightmare Night EP') - assertGreaterEqual(self, len(result['entries']), 4) + self.assertEqual(result['title'], 'Hierophany of the Open Grave') + assertGreaterEqual(self, len(result['entries']), 9) def test_smotri_community(self): dl = FakeYDL() diff --git a/test/test_utils.py b/test/test_utils.py index 51eb0b6b9..e26cc5b0c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -280,7 +280,7 @@ class TestUtil(unittest.TestCase): d = json.loads(stripped) self.assertEqual(d, [{"id": "532cb", "x": 3}]) - def test_uppercase_escpae(self): + def test_uppercase_escape(self): self.assertEqual(uppercase_escape(u'aä'), u'aä') self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐') diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index f0f33f1db..604e76ab6 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -1,5 +1,7 @@ #!/usr/bin/env python +from __future__ import unicode_literals + # Allow direct execution import os import sys @@ -16,52 +18,64 @@ from youtube_dl.utils import compat_str, compat_urlretrieve _TESTS = [ ( - u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', - u'js', + 'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', + 'js', 86, - u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321', + '>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321', ), ( - u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js', - u'js', + 'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js', + 'js', 85, - u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', + '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', ), ( - u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js', - u'js', + 'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js', + 'js', 90, - u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', + ']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', ), ( - u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js', - u'js', + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js', + 'js', 84, - u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=', + 'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=', ), ( - u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', - u'js', - u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', - u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', + 'js', + '2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', + 'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', ), ( - u'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf', - u'swf', + 'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf', + 'swf', 86, - u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?' + 'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?' ), ( - u'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf', - u'swf', - u'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9', - u'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F' + 'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf', + 'swf', + 'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9', + '9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F' ), ( - u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js', - u'js', + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js', + 'js', 84, - u'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>' + '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>' + ), + ( + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js', + 'js', + 83, + '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F' + ), + ( + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js', + 'js', + '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288', + '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B' ) ] @@ -75,7 +89,7 @@ class TestSignature(unittest.TestCase): def make_tfunc(url, stype, sig_input, expected_sig): - m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3)?\.[a-z]+$', url) + m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url) assert m, '%r should follow URL format' % url test_id = m.group(1) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4ff1ae0e8..e7194f3e3 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -162,6 +162,7 @@ class YoutubeDL(object): default_search: Prepend this string if an input url is not valid. 'auto' for elaborate guessing encoding: Use this encoding instead of the system-specified. + extract_flat: Do not resolve URLs, return the immediate result. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -275,7 +276,7 @@ class YoutubeDL(object): return message assert hasattr(self, '_output_process') - assert type(message) == type('') + assert isinstance(message, compat_str) line_count = message.count('\n') + 1 self._output_process.stdin.write((message + '\n').encode('utf-8')) self._output_process.stdin.flush() @@ -303,7 +304,7 @@ class YoutubeDL(object): def to_stderr(self, message): """Print message to stderr.""" - assert type(message) == type('') + assert isinstance(message, compat_str) if self.params.get('logger'): self.params['logger'].error(message) else: @@ -558,7 +559,12 @@ class YoutubeDL(object): Returns the resolved ie_result. """ - result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system + result_type = ie_result.get('_type', 'video') + + if self.params.get('extract_flat', False): + if result_type in ('url', 'url_transparent'): + return ie_result + if result_type == 'video': self.add_extra_info(ie_result, extra_info) return self.process_video_result(ie_result, download=download) @@ -849,7 +855,7 @@ class YoutubeDL(object): # Keep for backwards compatibility info_dict['stitle'] = info_dict['title'] - if not 'format' in info_dict: + if 'format' not in info_dict: info_dict['format'] = info_dict['ext'] reason = self._match_entry(info_dict) @@ -1234,21 +1240,18 @@ class YoutubeDL(object): if not self.params.get('verbose'): return + if type('') is not compat_str: + # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326) + self.report_warning( + 'Your Python is broken! Update to a newer and supported version') + encoding_str = ( '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % ( locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding())) - try: - write_string(encoding_str, encoding=None) - except: - errmsg = 'Failed to write encoding string %r' % encoding_str - try: - sys.stdout.write(errmsg) - except: - pass - raise IOError(errmsg) + write_string(encoding_str, encoding=None) self._write_string('[debug] youtube-dl version ' + __version__ + '\n') try: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index de7bc0f5f..f156ba3a0 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -66,6 +66,11 @@ __authors__ = ( 'Naglis Jonaitis', 'Charles Chen', 'Hassaan Ali', + 'Dobrosław Żybort', + 'David Fabijan', + 'Sebastian Haas', + 'Alexander Kirk', + 'Erik Johnson', ) __license__ = 'Public Domain' @@ -76,6 +81,7 @@ import optparse import os import random import shlex +import shutil import sys @@ -222,6 +228,7 @@ def parseOpts(overrideArguments=None): downloader = optparse.OptionGroup(parser, 'Download Options') postproc = optparse.OptionGroup(parser, 'Post-processing Options') filesystem = optparse.OptionGroup(parser, 'Filesystem Options') + workarounds = optparse.OptionGroup(parser, 'Workarounds') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') general.add_option('-h', '--help', @@ -238,14 +245,6 @@ def parseOpts(overrideArguments=None): general.add_option('--dump-user-agent', action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) - general.add_option('--user-agent', - dest='user_agent', help='specify a custom user agent', metavar='UA') - general.add_option('--referer', - dest='referer', help='specify a custom referer, use if the video access is restricted to one domain', - metavar='REF', default=None) - general.add_option('--add-header', - dest='headers', help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', action="append", - metavar='FIELD:VALUE') general.add_option('--list-extractors', action='store_true', dest='list_extractors', help='List all supported extractors and the URLs they would handle', default=False) @@ -255,33 +254,17 @@ def parseOpts(overrideArguments=None): general.add_option( '--proxy', dest='proxy', default=None, metavar='URL', help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') - general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') - general.add_option( - '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure', - help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)') - general.add_option( - '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', - help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') - general.add_option( - '--no-cache-dir', action='store_const', const=None, dest='cachedir', - help='Disable filesystem caching') general.add_option( '--socket-timeout', dest='socket_timeout', type=float, default=None, help=u'Time to wait before giving up, in seconds') - general.add_option( - '--bidi-workaround', dest='bidi_workaround', action='store_true', - help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH') general.add_option( '--default-search', dest='default_search', metavar='PREFIX', - help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.') + help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.') general.add_option( '--ignore-config', action='store_true', help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') - general.add_option( - '--encoding', dest='encoding', metavar='ENCODING', - help='Force the specified encoding (experimental)') selection.add_option( '--playlist-start', @@ -382,6 +365,33 @@ def parseOpts(overrideArguments=None): help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP) + workarounds.add_option( + '--encoding', dest='encoding', metavar='ENCODING', + help='Force the specified encoding (experimental)') + workarounds.add_option( + '--no-check-certificate', action='store_true', + dest='no_check_certificate', default=False, + help='Suppress HTTPS certificate validation.') + workarounds.add_option( + '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure', + help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)') + workarounds.add_option( + '--user-agent', metavar='UA', + dest='user_agent', help='specify a custom user agent') + workarounds.add_option( + '--referer', metavar='REF', + dest='referer', default=None, + help='specify a custom referer, use if the video access is restricted to one domain', + ) + workarounds.add_option( + '--add-header', metavar='FIELD:VALUE', + dest='headers', action='append', + help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', + ) + workarounds.add_option( + '--bidi-workaround', dest='bidi_workaround', action='store_true', + help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH') + verbosity.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode', default=False) verbosity.add_option( @@ -439,12 +449,10 @@ def parseOpts(overrideArguments=None): help='Display sent and read HTTP traffic') - filesystem.add_option('-t', '--title', - action='store_true', dest='usetitle', help='use title in file name (default)', default=False) + filesystem.add_option('-a', '--batch-file', + dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') filesystem.add_option('--id', action='store_true', dest='useid', help='use only video ID in file name', default=False) - filesystem.add_option('-l', '--literal', - action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False) filesystem.add_option('-A', '--auto-number', action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False) @@ -470,11 +478,10 @@ def parseOpts(overrideArguments=None): filesystem.add_option('--restrict-filenames', action='store_true', dest='restrictfilenames', help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False) - filesystem.add_option('-a', '--batch-file', - dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') - filesystem.add_option('--load-info', - dest='load_info_filename', metavar='FILE', - help='json file containing the video information (created with the "--write-json" option)') + filesystem.add_option('-t', '--title', + action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False) + filesystem.add_option('-l', '--literal', + action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False) filesystem.add_option('-w', '--no-overwrites', action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) filesystem.add_option('-c', '--continue', @@ -482,8 +489,6 @@ def parseOpts(overrideArguments=None): filesystem.add_option('--no-continue', action='store_false', dest='continue_dl', help='do not resume partially downloaded files (restart from beginning)') - filesystem.add_option('--cookies', - dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in') filesystem.add_option('--no-part', action='store_true', dest='nopart', help='do not use .part files', default=False) filesystem.add_option('--no-mtime', @@ -501,6 +506,20 @@ def parseOpts(overrideArguments=None): filesystem.add_option('--write-thumbnail', action='store_true', dest='writethumbnail', help='write thumbnail image to disk', default=False) + filesystem.add_option('--load-info', + dest='load_info_filename', metavar='FILE', + help='json file containing the video information (created with the "--write-json" option)') + filesystem.add_option('--cookies', + dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in') + filesystem.add_option( + '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', + help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') + filesystem.add_option( + '--no-cache-dir', action='store_const', const=None, dest='cachedir', + help='Disable filesystem caching') + filesystem.add_option( + '--rm-cache-dir', action='store_true', dest='rm_cachedir', + help='Delete all filesystem cache files') postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False, @@ -534,6 +553,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(downloader) parser.add_option_group(filesystem) parser.add_option_group(verbosity) + parser.add_option_group(workarounds) parser.add_option_group(video_format) parser.add_option_group(subtitles) parser.add_option_group(authentication) @@ -694,7 +714,7 @@ def _real_main(argv=None): date = DateRange.day(opts.date) else: date = DateRange(opts.dateafter, opts.datebefore) - if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search: + if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search: parser.error(u'--default-search invalid; did you forget a colon (:) at the end?') # Do not download videos when there are audio-only formats @@ -833,9 +853,26 @@ def _real_main(argv=None): if opts.update_self: update_self(ydl.to_screen, opts.verbose) + # Remove cache dir + if opts.rm_cachedir: + if opts.cachedir is None: + ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)') + else: + if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir): + ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir') + retcode = 141 + else: + ydl.to_screen( + u'Removing cache dir %s .' % opts.cachedir, + skip_eol=True) + if os.path.exists(opts.cachedir): + ydl.to_screen(u'.', skip_eol=True) + shutil.rmtree(opts.cachedir) + ydl.to_screen(u'.') + # Maybe do nothing if (len(all_urls) < 1) and (opts.load_info_filename is None): - if not opts.update_self: + if not (opts.update_self or opts.rm_cachedir): parser.error(u'you must provide at least one URL') else: sys.exit() diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 917f3450e..9ce97f5fe 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -292,7 +292,7 @@ class FileDownloader(object): def real_download(self, filename, info_dict): """Real download process. Redefine in subclasses.""" - raise NotImplementedError(u'This method must be implemented by sublcasses') + raise NotImplementedError(u'This method must be implemented by subclasses') def _hook_progress(self, status): for ph in self._progress_hooks: diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index e6be6ae6c..71353f607 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -220,6 +220,7 @@ class F4mFD(FileDownloader): def real_download(self, filename, info_dict): man_url = info_dict['url'] + requested_bitrate = info_dict.get('tbr') self.to_screen('[download] Downloading f4m manifest') manifest = self.ydl.urlopen(man_url).read() self.report_destination(filename) @@ -233,8 +234,14 @@ class F4mFD(FileDownloader): doc = etree.fromstring(manifest) formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))] - formats = sorted(formats, key=lambda f: f[0]) - rate, media = formats[-1] + if requested_bitrate is None: + # get the best format + formats = sorted(formats, key=lambda f: f[0]) + rate, media = formats[-1] + else: + rate, media = list(filter( + lambda f: int(f[0]) == requested_bitrate, formats))[0] + base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text) metadata = base64.b64decode(media.find(_add_ns('metadata')).text) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8d63d9281..9be1d2e0f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -1,3 +1,4 @@ +from .abc import ABCIE from .academicearth import AcademicEarthCourseIE from .addanime import AddAnimeIE from .adultswim import AdultSwimIE @@ -68,6 +69,7 @@ from .dfb import DFBIE from .dotsub import DotsubIE from .dreisat import DreiSatIE from .drtv import DRTVIE +from .dump import DumpIE from .defense import DefenseGouvFrIE from .discovery import DiscoveryIE from .divxstage import DivxStageIE @@ -76,6 +78,10 @@ from .ebaumsworld import EbaumsWorldIE from .ehow import EHowIE from .eighttracks import EightTracksIE from .eitb import EitbIE +from .ellentv import ( + EllenTVIE, + EllenTVClipsIE, +) from .elpais import ElPaisIE from .empflix import EmpflixIE from .engadget import EngadgetIE @@ -111,9 +117,11 @@ from .funnyordie import FunnyOrDieIE from .gamekings import GamekingsIE from .gameone import GameOneIE from .gamespot import GameSpotIE +from .gamestar import GameStarIE from .gametrailers import GametrailersIE from .gdcvault import GDCVaultIE from .generic import GenericIE +from .godtube import GodTubeIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE from .gorillavid import GorillaVidIE @@ -123,6 +131,7 @@ from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE from .hotnewhiphop import HotNewHipHopIE from .howcast import HowcastIE +from .howstuffworks import HowStuffWorksIE from .huffpost import HuffPostIE from .hypem import HypemIE from .iconosquare import IconosquareIE @@ -140,8 +149,10 @@ from .ivi import ( IviIE, IviCompilationIE ) +from .izlesene import IzleseneIE from .jadorecettepub import JadoreCettePubIE from .jeuxvideo import JeuxVideoIE +from .jove import JoveIE from .jukebox import JukeboxIE from .justintv import JustinTVIE from .jpopsukitv import JpopsukiIE @@ -151,6 +162,7 @@ from .khanacademy import KhanAcademyIE from .kickstarter import KickStarterIE from .keek import KeekIE from .kontrtube import KontrTubeIE +from .krasview import KrasViewIE from .ku6 import Ku6IE from .la7 import LA7IE from .lifenews import LifeNewsIE @@ -172,10 +184,12 @@ from .mdr import MDRIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mit import TechTVMITIE, MITIE, OCWMITIE +from .mitele import MiTeleIE from .mixcloud import MixcloudIE from .mlb import MLBIE from .mpora import MporaIE from .mofosex import MofosexIE +from .mojvideo import MojvideoIE from .mooshare import MooshareIE from .morningstar import MorningstarIE from .motherless import MotherlessIE @@ -219,10 +233,14 @@ from .nrk import ( from .ntv import NTVIE from .nytimes import NYTimesIE from .nuvid import NuvidIE -from .oe1 import OE1IE from .ooyala import OoyalaIE -from .orf import ORFIE +from .orf import ( + ORFTVthekIE, + ORFOE1IE, + ORFFM4IE, +) from .parliamentliveuk import ParliamentLiveUKIE +from .patreon import PatreonIE from .pbs import PBSIE from .photobucket import PhotobucketIE from .playvid import PlayvidIE @@ -242,6 +260,7 @@ from .ro220 import Ro220IE from .rottentomatoes import RottenTomatoesIE from .roxwel import RoxwelIE from .rtbf import RTBFIE +from .rtlnl import RtlXlIE from .rtlnow import RTLnowIE from .rts import RTSIE from .rtve import RTVEALaCartaIE @@ -258,6 +277,7 @@ from .savefrom import SaveFromIE from .scivee import SciVeeIE from .screencast import ScreencastIE from .servingsys import ServingSysIE +from .shared import SharedIE from .sina import SinaIE from .slideshare import SlideshareIE from .slutload import SlutloadIE @@ -320,6 +340,8 @@ from .tumblr import TumblrIE from .tutv import TutvIE from .tvigle import TvigleIE from .tvp import TvpIE +from .tvplay import TVPlayIE +from .ubu import UbuIE from .udemy import ( UdemyIE, UdemyCourseIE @@ -341,6 +363,7 @@ from .videofyme import VideofyMeIE from .videopremium import VideoPremiumIE from .videott import VideoTtIE from .videoweed import VideoWeedIE +from .vidme import VidmeIE from .vimeo import ( VimeoIE, VimeoChannelIE, @@ -374,6 +397,7 @@ from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE from .wrzuta import WrzutaIE from .xbef import XBefIE +from .xboxclips import XboxClipsIE from .xhamster import XHamsterIE from .xnxx import XNXXIE from .xvideos import XVideosIE diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py new file mode 100644 index 000000000..7d89f44ee --- /dev/null +++ b/youtube_dl/extractor/abc.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor + + +class ABCIE(InfoExtractor): + IE_NAME = 'abc.net.au' + _VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P\d+)' + + _TEST = { + 'url': 'http://www.abc.net.au/news/2014-07-25/bringing-asylum-seekers-to-australia-would-give/5624716', + 'md5': 'dad6f8ad011a70d9ddf887ce6d5d0742', + 'info_dict': { + 'id': '5624716', + 'ext': 'mp4', + 'title': 'Bringing asylum seekers to Australia would give them right to asylum claims: professor', + 'description': 'md5:ba36fa5e27e5c9251fd929d339aea4af', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + + urls_info_json = self._search_regex( + r'inlineVideoData\.push\((.*?)\);', webpage, 'video urls', + flags=re.DOTALL) + urls_info = json.loads(urls_info_json.replace('\'', '"')) + formats = [{ + 'url': url_info['url'], + 'width': int(url_info['width']), + 'height': int(url_info['height']), + 'tbr': int(url_info['bitrate']), + 'filesize': int(url_info['filesize']), + } for url_info in urls_info] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'formats': formats, + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + } diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index 7e93bc4df..748608826 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -1,5 +1,7 @@ #coding: utf-8 +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -13,13 +15,14 @@ class AparatIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P[a-zA-Z0-9]+)' _TEST = { - u'url': u'http://www.aparat.com/v/wP8On', - u'file': u'wP8On.mp4', - u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1', - u'info_dict': { - u"title": u"تیم گلکسی 11 - زومیت", + 'url': 'http://www.aparat.com/v/wP8On', + 'md5': '6714e0af7e0d875c5a39c4dc4ab46ad1', + 'info_dict': { + 'id': 'wP8On', + 'ext': 'mp4', + 'title': 'تیم گلکسی 11 - زومیت', }, - #u'skip': u'Extremely unreliable', + # 'skip': 'Extremely unreliable', } def _real_extract(self, url): @@ -29,8 +32,8 @@ class AparatIE(InfoExtractor): # Note: There is an easier-to-parse configuration at # http://www.aparat.com/video/video/config/videohash/%video_id # but the URL in there does not work - embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' + - video_id + u'/vt/frame') + embed_url = ('http://www.aparat.com/video/video/embed/videohash/' + + video_id + '/vt/frame') webpage = self._download_webpage(embed_url, video_id) video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage) diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index dc8657b67..4359b88d1 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -6,6 +6,7 @@ import json from .common import InfoExtractor from ..utils import ( compat_urlparse, + int_or_none, ) @@ -110,8 +111,8 @@ class AppleTrailersIE(InfoExtractor): formats.append({ 'url': format_url, 'format': format['type'], - 'width': format['width'], - 'height': int(format['height']), + 'width': int_or_none(format['width']), + 'height': int_or_none(format['height']), }) self._sort_formats(formats) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 30a85c8c1..7f0da8ab6 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -8,6 +8,8 @@ from ..utils import ( determine_ext, ExtractorError, qualities, + compat_urllib_parse_urlparse, + compat_urllib_parse, ) @@ -44,8 +46,14 @@ class ARDIE(InfoExtractor): else: video_id = m.group('video_id') + urlp = compat_urllib_parse_urlparse(url) + url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl() + webpage = self._download_webpage(url, video_id) + if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage: + raise ExtractorError('Video %s is no longer available' % video_id, expected=True) + title = self._html_search_regex( [r'(.*?)', r'', diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 9591bad8a..d86dbba8e 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -109,15 +109,19 @@ class ArteTVPlus7IE(InfoExtractor): regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] return any(re.match(r, f['versionCode']) for r in regexes) # Some formats may not be in the same language as the url + # TODO: Might want not to drop videos that does not match requested language + # but to process those formats with lower precedence formats = filter(_match_lang, all_formats) - formats = list(formats) # in python3 filter returns an iterator + formats = list(formats) # in python3 filter returns an iterator if not formats: # Some videos are only available in the 'Originalversion' # they aren't tagged as being in French or German - if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats): - formats = all_formats - else: - raise ExtractorError(u'The formats list is empty') + # Sometimes there are neither videos of requested lang code + # nor original version videos available + # For such cases we just take all_formats as is + formats = all_formats + if not formats: + raise ExtractorError('The formats list is empty') if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: def sort_key(f): diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py index 7d558e262..3e461e715 100644 --- a/youtube_dl/extractor/blinkx.py +++ b/youtube_dl/extractor/blinkx.py @@ -52,7 +52,7 @@ class BlinkxIE(InfoExtractor): 'height': int(m['h']), }) elif m['type'] == 'original': - duration = m['d'] + duration = float(m['d']) elif m['type'] == 'youtube': yt_id = m['link'] self.to_screen('Youtube video detected: %s' % yt_id) diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 25fb79e14..c51a97ce4 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor): _TEST = { 'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', - 'md5': '7bf08858ff7c203c870e8a6190e221e5', + # The md5 checksum changes 'info_dict': { 'id': 'qurhIVlJSB6hzkVi229d8g', 'ext': 'flv', @@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor): return { 'id': name.split('-')[-1], 'title': title, - 'url': f4m_url, - 'ext': 'flv', + 'formats': self._extract_f4m_formats(f4m_url, name), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), } diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index f7f2f713a..86f0c2861 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + parse_duration, ) @@ -22,8 +23,9 @@ class BRIE(InfoExtractor): 'info_dict': { 'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a', 'ext': 'mp4', - 'title': 'Am 1. und 2. August in Oberammergau', - 'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021', + 'title': 'Wenn das Traditions-Theater wackelt', + 'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt', + 'duration': 34, } }, { @@ -34,6 +36,7 @@ class BRIE(InfoExtractor): 'ext': 'mp4', 'title': 'Über den Pass', 'description': 'Die Eroberung der Alpen: Über den Pass', + 'duration': 2588, } }, { @@ -44,6 +47,7 @@ class BRIE(InfoExtractor): 'ext': 'aac', 'title': '"Keine neuen Schulden im nächsten Jahr"', 'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"', + 'duration': 64, } }, { @@ -54,6 +58,7 @@ class BRIE(InfoExtractor): 'ext': 'mp4', 'title': 'Umweltbewusster Häuslebauer', 'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer', + 'duration': 116, } }, { @@ -64,6 +69,7 @@ class BRIE(InfoExtractor): 'ext': 'mp4', 'title': 'Folge 1 - Metaphysik', 'description': 'Kant für Anfänger: Folge 1 - Metaphysik', + 'duration': 893, 'uploader': 'Eva Maria Steimle', 'upload_date': '20140117', } @@ -84,6 +90,7 @@ class BRIE(InfoExtractor): media = { 'id': xml_media.get('externalId'), 'title': xml_media.find('title').text, + 'duration': parse_duration(xml_media.find('duration').text), 'formats': self._extract_formats(xml_media.find('assets')), 'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')), 'description': ' '.join(xml_media.find('shareTitle').text.splitlines()), diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 88f12797c..2e6eeac08 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -18,6 +18,7 @@ from ..utils import ( clean_html, compiled_regex_type, ExtractorError, + int_or_none, RegexNotFoundError, sanitize_filename, unescapeHTML, @@ -373,7 +374,8 @@ class InfoExtractor(object): else: for p in pattern: mobj = re.search(p, string, flags) - if mobj: break + if mobj: + break if os.name != 'nt' and sys.stderr.isatty(): _name = u'\033[0;34m%s\033[0m' % name @@ -461,8 +463,9 @@ class InfoExtractor(object): return self._og_search_property('title', html, **kargs) def _og_search_video_url(self, html, name='video url', secure=True, **kargs): - regexes = self._og_regexes('video') - if secure: regexes = self._og_regexes('video:secure_url') + regexes + regexes = self._og_regexes('video') + self._og_regexes('video:url') + if secure: + regexes = self._og_regexes('video:secure_url') + regexes return self._html_search_regex(regexes, html, name, **kargs) def _og_search_url(self, html, **kargs): @@ -589,6 +592,24 @@ class InfoExtractor(object): self.to_screen(msg) time.sleep(timeout) + def _extract_f4m_formats(self, manifest_url, video_id): + manifest = self._download_xml( + manifest_url, video_id, 'Downloading f4m manifest', + 'Unable to download f4m manifest') + + formats = [] + for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'): + formats.append({ + 'url': manifest_url, + 'ext': 'flv', + 'tbr': int_or_none(media_el.attrib.get('bitrate')), + 'width': int_or_none(media_el.attrib.get('width')), + 'height': int_or_none(media_el.attrib.get('height')), + }) + self._sort_formats(formats) + + return formats + class SearchInfoExtractor(InfoExtractor): """ diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py index cb8e06822..8049779b0 100644 --- a/youtube_dl/extractor/dfb.py +++ b/youtube_dl/extractor/dfb.py @@ -30,7 +30,7 @@ class DFBIE(InfoExtractor): video_id) video_info = player_info.find('video') - f4m_info = self._download_xml(video_info.find('url').text, video_id) + f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id) token_el = f4m_info.find('token') manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0' diff --git a/youtube_dl/extractor/dump.py b/youtube_dl/extractor/dump.py new file mode 100644 index 000000000..6b651778a --- /dev/null +++ b/youtube_dl/extractor/dump.py @@ -0,0 +1,39 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class DumpIE(InfoExtractor): + _VALID_URL = r'^https?://(?:www\.)?dump\.com/(?P[a-zA-Z0-9]+)/' + + _TEST = { + 'url': 'http://www.dump.com/oneus/', + 'md5': 'ad71704d1e67dfd9e81e3e8b42d69d99', + 'info_dict': { + 'id': 'oneus', + 'ext': 'flv', + 'title': "He's one of us.", + 'thumbnail': 're:^https?://.*\.jpg$', + }, + } + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + + webpage = self._download_webpage(url, video_id) + video_url = self._search_regex( + r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL') + + thumb = self._og_search_thumbnail(webpage) + title = self._search_regex(r'([^"]+)', webpage, 'title') + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'thumbnail': thumb, + } diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py new file mode 100644 index 000000000..3e7923648 --- /dev/null +++ b/youtube_dl/extractor/ellentv.py @@ -0,0 +1,79 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + parse_iso8601, +) + + +class EllenTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P[a-z0-9_-]+)' + _TEST = { + 'url': 'http://www.ellentv.com/videos/0-7jqrsr18/', + 'md5': 'e4af06f3bf0d5f471921a18db5764642', + 'info_dict': { + 'id': '0-7jqrsr18', + 'ext': 'mp4', + 'title': 'What\'s Wrong with These Photos? A Whole Lot', + 'timestamp': 1406876400, + 'upload_date': '20140801', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + timestamp = parse_iso8601(self._search_regex( + r'