Merge remote-tracking branch '5moufl/behindkink'
authorPhilipp Hagemeister <phihag@phihag.de>
Mon, 15 Sep 2014 21:36:21 +0000 (23:36 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Mon, 15 Sep 2014 21:36:21 +0000 (23:36 +0200)
47 files changed:
.gitignore
LATEST_VERSION [deleted file]
MANIFEST.in
Makefile
README.md
devscripts/fish-completion.in [new file with mode: 0644]
devscripts/fish-completion.py [new file with mode: 0755]
devscripts/release.sh
setup.py
test/test_utils.py
test/test_youtube_lists.py
youtube-dl [deleted file]
youtube-dl.exe [deleted file]
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/adultswim.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/br.py
youtube_dl/extractor/chilloutzone.py
youtube_dl/extractor/cloudy.py [new file with mode: 0644]
youtube_dl/extractor/daum.py
youtube_dl/extractor/deezer.py [new file with mode: 0644]
youtube_dl/extractor/drtv.py
youtube_dl/extractor/einthusan.py [new file with mode: 0644]
youtube_dl/extractor/facebook.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/hostingbulk.py [new file with mode: 0644]
youtube_dl/extractor/izlesene.py
youtube_dl/extractor/nosvideo.py
youtube_dl/extractor/pornhd.py
youtube_dl/extractor/pornoxo.py [new file with mode: 0644]
youtube_dl/extractor/prosiebensat1.py
youtube_dl/extractor/spiegel.py
youtube_dl/extractor/swrmediathek.py
youtube_dl/extractor/telemb.py [new file with mode: 0644]
youtube_dl/extractor/tumblr.py
youtube_dl/extractor/turbo.py [new file with mode: 0644]
youtube_dl/extractor/tvplay.py
youtube_dl/extractor/vporn.py
youtube_dl/extractor/xhamster.py
youtube_dl/extractor/youjizz.py
youtube_dl/extractor/youporn.py
youtube_dl/extractor/youtube.py
youtube_dl/options.py [new file with mode: 0644]
youtube_dl/utils.py
youtube_dl/version.py

index b8128fab17f0599c5aac3fd1313d8caf32cf535b..e44977ca36ed367c009fea0144f50d3d1893d082 100644 (file)
@@ -11,6 +11,7 @@ MANIFEST
 README.txt
 youtube-dl.1
 youtube-dl.bash-completion
+youtube-dl.fish
 youtube-dl
 youtube-dl.exe
 youtube-dl.tar.gz
diff --git a/LATEST_VERSION b/LATEST_VERSION
deleted file mode 100644 (file)
index a334573..0000000
+++ /dev/null
@@ -1 +0,0 @@
-2012.12.99
index d43cc1f3ba95e2ec16728320b5dd64b8a3558abb..5743f605a2ab4e93e76416732f6e42b252e87150 100644 (file)
@@ -2,5 +2,6 @@ include README.md
 include test/*.py
 include test/*.json
 include youtube-dl.bash-completion
+include youtube-dl.fish
 include youtube-dl.1
 recursive-include docs Makefile conf.py *.rst
index 088a9320bddfd367babd928bc96c71f3eaa4d9de..6272b826ce0bc86749948684c81f8436f29c7b9b 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
-all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
+all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.fish
 
 clean:
-       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
+       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.fish
 
 cleanall: clean
        rm -f youtube-dl youtube-dl.exe
@@ -29,6 +29,8 @@ install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
        install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1
        install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d
        install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl
+       install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions
+       install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish
 
 test:
        #nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
@@ -36,9 +38,9 @@ test:
 
 tar: youtube-dl.tar.gz
 
-.PHONY: all clean install test tar bash-completion pypi-files
+.PHONY: all clean install test tar bash-completion pypi-files fish-completion
 
-pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1
+pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
 
 youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
        zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py
@@ -64,7 +66,12 @@ youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-co
 
 bash-completion: youtube-dl.bash-completion
 
-youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
+youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in
+       python devscripts/fish-completion.py
+
+fish-completion: youtube-dl.fish
+
+youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.fish
        @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
                --exclude '*.DS_Store' \
                --exclude '*.kate-swp' \
@@ -78,5 +85,6 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
                -- \
                bin devscripts test youtube_dl docs \
                LICENSE README.md README.txt \
-               Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
+               Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
+               youtube-dl.fish setup.py \
                youtube-dl
index ca366039e4515f2095e15c260a4bd4fa65d22e87..5cc959ac54d33de813222558d6c44ce06a5e3391 100644 (file)
--- a/README.md
+++ b/README.md
@@ -345,6 +345,25 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231
 
 # FAQ
 
+### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
+
+YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
+
+If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to report bugs to the Ubuntu packaging guys - all they have to do is update the package to a somewhat recent version.
+
+Alternatively, uninstall the youtube-dl package and follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html). In a pinch, this should do if you used `apt-get` before to install youtube-dl:
+
+```
+sudo apt-get remove -y youtube-dl
+sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
+sudo chmod a+x /usr/local/bin/youtube-dl
+hash -r
+```
+
+### Do I always have to pass in `--max-quality FORMAT`, or `-citw`?
+
+By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, `--max-quality` *limits* the video quality (so if you want the best quality, do NOT pass it in), and the only option out of `-citw` that is regularly useful is `-i`.
+
 ### Can you please put the -b option back?
 
 Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and youtube-dl will try to download it.
diff --git a/devscripts/fish-completion.in b/devscripts/fish-completion.in
new file mode 100644 (file)
index 0000000..eb79765
--- /dev/null
@@ -0,0 +1,5 @@
+
+{{commands}}
+
+
+complete --command youtube-dl --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py
new file mode 100755 (executable)
index 0000000..f4aaf02
--- /dev/null
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+import optparse
+import os
+from os.path import dirname as dirn
+import sys
+
+sys.path.append(dirn(dirn((os.path.abspath(__file__)))))
+import youtube_dl
+from youtube_dl.utils import shell_quote
+
+FISH_COMPLETION_FILE = 'youtube-dl.fish'
+FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
+
+EXTRA_ARGS = {
+    'recode-video': ['--arguments', 'mp4 flv ogg webm mkv', '--exclusive'],
+
+    # Options that need a file parameter
+    'download-archive': ['--require-parameter'],
+    'cookies': ['--require-parameter'],
+    'load-info': ['--require-parameter'],
+    'batch-file': ['--require-parameter'],
+}
+
+def build_completion(opt_parser):
+    commands = []
+
+    for group in opt_parser.option_groups:
+        for option in group.option_list:
+            long_option = option.get_opt_string().strip('-')
+            help_msg = shell_quote([option.help])
+            complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option]
+            if option._short_opts:
+                complete_cmd += ['--short-option', option._short_opts[0].strip('-')]
+            if option.help != optparse.SUPPRESS_HELP:
+                complete_cmd += ['--description', option.help]
+            complete_cmd.extend(EXTRA_ARGS.get(long_option, []))
+            commands.append(shell_quote(complete_cmd))
+
+    with open(FISH_COMPLETION_TEMPLATE) as f:
+        template = f.read()
+    filled_template = template.replace('{{commands}}', '\n'.join(commands))
+    with open(FISH_COMPLETION_FILE, 'w') as f:
+        f.write(filled_template)
+
+parser = youtube_dl.parseOpts()[0]
+build_completion(parser)
index 453087e5f70fa92906926ef12ab3b192087c51c3..691517ceb9b34394115ed4e54521bad1d4f3b54b 100755 (executable)
@@ -73,7 +73,6 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
 (cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS)
 (cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
 (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
-git checkout HEAD -- youtube-dl youtube-dl.exe
 
 /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
 for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
index 03e7b358e4ec1b4800e06f6796e386a808b67891..cf6b92b0f7e61b504dfdc16b6b04568fd073982b 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -48,6 +48,7 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
 else:
     files_spec = [
         ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
+        ('etc/fish/completions', ['youtube-dl.fish']),
         ('share/doc/youtube_dl', ['README.txt']),
         ('share/man/man1', ['youtube-dl.1'])
     ]
index 8d89979775c75db9f54c041716f5bfc92a77150a..3efbed29dd34de570f2db4e6eb4954ec2f4b9c6e 100644 (file)
@@ -40,6 +40,9 @@ from youtube_dl.utils import (
     parse_iso8601,
     strip_jsonp,
     uppercase_escape,
+    limit_length,
+    escape_rfc3986,
+    escape_url,
 )
 
 
@@ -286,5 +289,41 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(uppercase_escape('aä'), 'aä')
         self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
 
+    def test_limit_length(self):
+        self.assertEqual(limit_length(None, 12), None)
+        self.assertEqual(limit_length('foo', 12), 'foo')
+        self.assertTrue(
+            limit_length('foo bar baz asd', 12).startswith('foo bar'))
+        self.assertTrue('...' in limit_length('foo bar baz asd', 12))
+
+    def test_escape_rfc3986(self):
+        reserved = "!*'();:@&=+$,/?#[]"
+        unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
+        self.assertEqual(escape_rfc3986(reserved), reserved)
+        self.assertEqual(escape_rfc3986(unreserved), unreserved)
+        self.assertEqual(escape_rfc3986('тест'), '%D1%82%D0%B5%D1%81%D1%82')
+        self.assertEqual(escape_rfc3986('%D1%82%D0%B5%D1%81%D1%82'), '%D1%82%D0%B5%D1%81%D1%82')
+        self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar')
+        self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar')
+
+    def test_escape_url(self):
+        self.assertEqual(
+            escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'),
+            'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4'
+        )
+        self.assertEqual(
+            escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
+            'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290'
+        )
+        self.assertEqual(
+            escape_url('http://тест.рф/фрагмент'),
+            'http://тест.рф/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
+        )
+        self.assertEqual(
+            escape_url('http://тест.рф/абв?абв=абв#абв'),
+            'http://тест.рф/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
+        )
+        self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
+
 if __name__ == '__main__':
     unittest.main()
index 3aadedd64cf5af38ab1d18b640b10301c2073de2..1fa99f88b595644df5c4ed50e4c134cc15668638 100644 (file)
@@ -25,15 +25,6 @@ class TestYoutubeLists(unittest.TestCase):
         """Make sure the info has '_type' set to 'playlist'"""
         self.assertEqual(info['_type'], 'playlist')
 
-    def test_youtube_playlist(self):
-        dl = FakeYDL()
-        ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'ytdl test PL')
-        ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
-        self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
-
     def test_youtube_playlist_noplaylist(self):
         dl = FakeYDL()
         dl.params['noplaylist'] = True
@@ -41,36 +32,7 @@ class TestYoutubeLists(unittest.TestCase):
         result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
         self.assertEqual(result['_type'], 'url')
         self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
-
-    def test_issue_673(self):
-        dl = FakeYDL()
-        ie = YoutubePlaylistIE(dl)
-        result = ie.extract('PLBB231211A4F62143')
-        self.assertTrue(len(result['entries']) > 25)
-
-    def test_youtube_playlist_long(self):
-        dl = FakeYDL()
-        ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
-        self.assertIsPlaylist(result)
-        self.assertTrue(len(result['entries']) >= 799)
-
-    def test_youtube_playlist_with_deleted(self):
-        #651
-        dl = FakeYDL()
-        ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
-        ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
-        self.assertFalse('pElCt5oNDuI' in ytie_results)
-        self.assertFalse('KdPEApIVdWM' in ytie_results)
-        
-    def test_youtube_playlist_empty(self):
-        dl = FakeYDL()
-        ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')
-        self.assertIsPlaylist(result)
-        self.assertEqual(len(result['entries']), 0)
-
+    
     def test_youtube_course(self):
         dl = FakeYDL()
         ie = YoutubePlaylistIE(dl)
@@ -97,12 +59,6 @@ class TestYoutubeLists(unittest.TestCase):
         result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
         self.assertTrue(len(result['entries']) >= 320)
 
-    def test_youtube_safe_search(self):
-        dl = FakeYDL()
-        ie = YoutubePlaylistIE(dl)
-        result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')
-        self.assertEqual(len(result['entries']), 2)
-
     def test_youtube_show(self):
         dl = FakeYDL()
         ie = YoutubeShowIE(dl)
diff --git a/youtube-dl b/youtube-dl
deleted file mode 100755 (executable)
index e3eb877..0000000
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env python
-
-import sys, os
-import json, hashlib
-
-try:
-    import urllib.request as compat_urllib_request
-except ImportError: # Python 2
-    import urllib2 as compat_urllib_request
-
-def rsa_verify(message, signature, key):
-    from struct import pack
-    from hashlib import sha256
-    from sys import version_info
-    def b(x):
-        if version_info[0] == 2: return x
-        else: return x.encode('latin1')
-    assert(type(message) == type(b('')))
-    block_size = 0
-    n = key[0]
-    while n:
-        block_size += 1
-        n >>= 8
-    signature = pow(int(signature, 16), key[1], key[0])
-    raw_bytes = []
-    while signature:
-        raw_bytes.insert(0, pack("B", signature & 0xFF))
-        signature >>= 8
-    signature = (block_size - len(raw_bytes)) * b('\x00') + b('').join(raw_bytes)
-    if signature[0:2] != b('\x00\x01'): return False
-    signature = signature[2:]
-    if not b('\x00') in signature: return False
-    signature = signature[signature.index(b('\x00'))+1:]
-    if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')): return False
-    signature = signature[19:]
-    if signature != sha256(message).digest(): return False
-    return True
-
-sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n')
-sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n')
-sys.stderr.write(u'From now on, get the binaries from http://rg3.github.io/youtube-dl/download.html, not from the git repository.\n\n')
-
-try:
-       raw_input()
-except NameError: # Python 3
-       input()
-
-filename = sys.argv[0]
-
-UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
-VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
-JSON_URL = UPDATE_URL + 'versions.json'
-UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
-
-if not os.access(filename, os.W_OK):
-    sys.exit('ERROR: no write permissions on %s' % filename)
-
-try:
-    versions_info = compat_urllib_request.urlopen(JSON_URL).read().decode('utf-8')
-    versions_info = json.loads(versions_info)
-except:
-    sys.exit(u'ERROR: can\'t obtain versions info. Please try again later.')
-if not 'signature' in versions_info:
-    sys.exit(u'ERROR: the versions file is not signed or corrupted. Aborting.')
-signature = versions_info['signature']
-del versions_info['signature']
-if not rsa_verify(json.dumps(versions_info, sort_keys=True).encode('utf-8'), signature, UPDATES_RSA_KEY):
-    sys.exit(u'ERROR: the versions file signature is invalid. Aborting.')
-
-version = versions_info['versions'][versions_info['latest']]
-
-try:
-    urlh = compat_urllib_request.urlopen(version['bin'][0])
-    newcontent = urlh.read()
-    urlh.close()
-except (IOError, OSError) as err:
-    sys.exit('ERROR: unable to download latest version')
-
-newcontent_hash = hashlib.sha256(newcontent).hexdigest()
-if newcontent_hash != version['bin'][1]:
-    sys.exit(u'ERROR: the downloaded file hash does not match. Aborting.')
-
-try:
-    with open(filename, 'wb') as outf:
-        outf.write(newcontent)
-except (IOError, OSError) as err:
-    sys.exit('ERROR: unable to overwrite current version')
-
-sys.stderr.write(u'Done! Now you can run youtube-dl.\n')
diff --git a/youtube-dl.exe b/youtube-dl.exe
deleted file mode 100644 (file)
index 45eee04..0000000
Binary files a/youtube-dl.exe and /dev/null differ
index 553bf559b3b2e7b155b2e14b2f2f49eda91ba9e9..9519594c9ad2dab36e1d66fe2f016d7edc6e798d 100755 (executable)
@@ -28,6 +28,7 @@ from .utils import (
     compat_str,
     compat_urllib_error,
     compat_urllib_request,
+    escape_url,
     ContentTooShortError,
     date_from_str,
     DateRange,
@@ -1241,6 +1242,25 @@ class YoutubeDL(object):
 
     def urlopen(self, req):
         """ Start an HTTP download """
+
+        # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
+        # always respected by websites, some tend to give out URLs with non percent-encoded
+        # non-ASCII characters (see telemb.py, ard.py [#3412])
+        # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
+        # To work around aforementioned issue we will replace request's original URL with
+        # percent-encoded one
+        url = req if isinstance(req, compat_str) else req.get_full_url()
+        url_escaped = escape_url(url)
+
+        # Substitute URL if any change after escaping
+        if url != url_escaped:
+            if isinstance(req, compat_str):
+                req = url_escaped
+            else:
+                req = compat_urllib_request.Request(
+                    url_escaped, data=req.data, headers=req.headers,
+                    origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
+
         return self._opener.open(req, timeout=self._socket_timeout)
 
     def print_debug_header(self):
index 8f1a1a2322244314c785026b079182d7fac48022..871ddead9e83ee2de91713b4b1a0828b21d96450 100644 (file)
@@ -75,26 +75,27 @@ __authors__  = (
     'Ole Ernst',
     'Aaron McDaniel (mcd1992)',
     'Magnus Kolstad',
+    'Hari Padmanaban',
 )
 
 __license__ = 'Public Domain'
 
 import codecs
 import io
-import optparse
 import os
 import random
-import shlex
 import sys
 
 
+from .options import (
+    parseOpts,
+)
 from .utils import (
     compat_getpass,
     compat_print,
     DateRange,
     DEFAULT_OUTTMPL,
     decodeOption,
-    get_term_width,
     DownloadError,
     MaxDownloadsReached,
     preferredencoding,
@@ -109,7 +110,6 @@ from .downloader import (
     FileDownloader,
 )
 from .extractor import gen_extractors
-from .version import __version__
 from .YoutubeDL import YoutubeDL
 from .postprocessor import (
     AtomicParsleyPP,
@@ -123,475 +123,6 @@ from .postprocessor import (
 )
 
 
-def parseOpts(overrideArguments=None):
-    def _readOptions(filename_bytes, default=[]):
-        try:
-            optionf = open(filename_bytes)
-        except IOError:
-            return default  # silently skip if file is not present
-        try:
-            res = []
-            for l in optionf:
-                res += shlex.split(l, comments=True)
-        finally:
-            optionf.close()
-        return res
-
-    def _readUserConf():
-        xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
-        if xdg_config_home:
-            userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
-            if not os.path.isfile(userConfFile):
-                userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
-        else:
-            userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
-            if not os.path.isfile(userConfFile):
-                userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
-        userConf = _readOptions(userConfFile, None)
-
-        if userConf is None:
-            appdata_dir = os.environ.get('appdata')
-            if appdata_dir:
-                userConf = _readOptions(
-                    os.path.join(appdata_dir, 'youtube-dl', 'config'),
-                    default=None)
-                if userConf is None:
-                    userConf = _readOptions(
-                        os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
-                        default=None)
-
-        if userConf is None:
-            userConf = _readOptions(
-                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
-                default=None)
-        if userConf is None:
-            userConf = _readOptions(
-                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
-                default=None)
-
-        if userConf is None:
-            userConf = []
-
-        return userConf
-
-    def _format_option_string(option):
-        ''' ('-o', '--option') -> -o, --format METAVAR'''
-
-        opts = []
-
-        if option._short_opts:
-            opts.append(option._short_opts[0])
-        if option._long_opts:
-            opts.append(option._long_opts[0])
-        if len(opts) > 1:
-            opts.insert(1, ', ')
-
-        if option.takes_value(): opts.append(' %s' % option.metavar)
-
-        return "".join(opts)
-
-    def _comma_separated_values_options_callback(option, opt_str, value, parser):
-        setattr(parser.values, option.dest, value.split(','))
-
-    def _hide_login_info(opts):
-        opts = list(opts)
-        for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:
-            try:
-                i = opts.index(private_opt)
-                opts[i+1] = '<PRIVATE>'
-            except ValueError:
-                pass
-        return opts
-
-    max_width = 80
-    max_help_position = 80
-
-    # No need to wrap help messages if we're on a wide console
-    columns = get_term_width()
-    if columns: max_width = columns
-
-    fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
-    fmt.format_option_strings = _format_option_string
-
-    kw = {
-        'version'   : __version__,
-        'formatter' : fmt,
-        'usage' : '%prog [options] url [url...]',
-        'conflict_handler' : 'resolve',
-    }
-
-    parser = optparse.OptionParser(**kw)
-
-    # option groups
-    general        = optparse.OptionGroup(parser, 'General Options')
-    selection      = optparse.OptionGroup(parser, 'Video Selection')
-    authentication = optparse.OptionGroup(parser, 'Authentication Options')
-    video_format   = optparse.OptionGroup(parser, 'Video Format Options')
-    subtitles      = optparse.OptionGroup(parser, 'Subtitle Options')
-    downloader     = optparse.OptionGroup(parser, 'Download Options')
-    postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
-    filesystem     = optparse.OptionGroup(parser, 'Filesystem Options')
-    workarounds    = optparse.OptionGroup(parser, 'Workarounds')
-    verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
-
-    general.add_option('-h', '--help',
-            action='help', help='print this help text and exit')
-    general.add_option('-v', '--version',
-            action='version', help='print program version and exit')
-    general.add_option('-U', '--update',
-            action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
-    general.add_option('-i', '--ignore-errors',
-            action='store_true', dest='ignoreerrors', help='continue on download errors, for example to skip unavailable videos in a playlist', default=False)
-    general.add_option('--abort-on-error',
-            action='store_false', dest='ignoreerrors',
-            help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
-    general.add_option('--dump-user-agent',
-            action='store_true', dest='dump_user_agent',
-            help='display the current browser identification', default=False)
-    general.add_option('--list-extractors',
-            action='store_true', dest='list_extractors',
-            help='List all supported extractors and the URLs they would handle', default=False)
-    general.add_option('--extractor-descriptions',
-            action='store_true', dest='list_extractor_descriptions',
-            help='Output descriptions of all supported extractors', default=False)
-    general.add_option(
-        '--proxy', dest='proxy', default=None, metavar='URL',
-        help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
-    general.add_option(
-        '--socket-timeout', dest='socket_timeout',
-        type=float, default=None, help=u'Time to wait before giving up, in seconds')
-    general.add_option(
-        '--default-search',
-        dest='default_search', metavar='PREFIX',
-        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
-    general.add_option(
-        '--ignore-config',
-        action='store_true',
-        help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
-
-    selection.add_option(
-        '--playlist-start',
-        dest='playliststart', metavar='NUMBER', default=1, type=int,
-        help='playlist video to start at (default is %default)')
-    selection.add_option(
-        '--playlist-end',
-        dest='playlistend', metavar='NUMBER', default=None, type=int,
-        help='playlist video to end at (default is last)')
-    selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
-    selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
-    selection.add_option('--max-downloads', metavar='NUMBER',
-                         dest='max_downloads', type=int, default=None,
-                         help='Abort after downloading NUMBER files')
-    selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
-    selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
-    selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
-    selection.add_option(
-        '--datebefore', metavar='DATE', dest='datebefore', default=None,
-        help='download only videos uploaded on or before this date (i.e. inclusive)')
-    selection.add_option(
-        '--dateafter', metavar='DATE', dest='dateafter', default=None,
-        help='download only videos uploaded on or after this date (i.e. inclusive)')
-    selection.add_option(
-        '--min-views', metavar='COUNT', dest='min_views',
-        default=None, type=int,
-        help="Do not download any videos with less than COUNT views",)
-    selection.add_option(
-        '--max-views', metavar='COUNT', dest='max_views',
-        default=None, type=int,
-        help="Do not download any videos with more than COUNT views",)
-    selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
-    selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
-                         help='download only videos suitable for the given age',
-                         default=None, type=int)
-    selection.add_option('--download-archive', metavar='FILE',
-                         dest='download_archive',
-                         help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
-    selection.add_option(
-        '--include-ads', dest='include_ads',
-        action='store_true',
-        help='Download advertisements as well (experimental)')
-    selection.add_option(
-        '--youtube-include-dash-manifest', action='store_true',
-        dest='youtube_include_dash_manifest', default=False,
-        help='Try to download the DASH manifest on YouTube videos (experimental)')
-
-    authentication.add_option('-u', '--username',
-            dest='username', metavar='USERNAME', help='account username')
-    authentication.add_option('-p', '--password',
-            dest='password', metavar='PASSWORD', help='account password')
-    authentication.add_option('-2', '--twofactor',
-            dest='twofactor', metavar='TWOFACTOR', help='two-factor auth code')
-    authentication.add_option('-n', '--netrc',
-            action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
-    authentication.add_option('--video-password',
-            dest='videopassword', metavar='PASSWORD', help='video password (vimeo, smotri)')
-
-
-    video_format.add_option('-f', '--format',
-            action='store', dest='format', metavar='FORMAT', default=None,
-            help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.')
-    video_format.add_option('--all-formats',
-            action='store_const', dest='format', help='download all available video formats', const='all')
-    video_format.add_option('--prefer-free-formats',
-            action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
-    video_format.add_option('--max-quality',
-            action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
-    video_format.add_option('-F', '--list-formats',
-            action='store_true', dest='listformats', help='list all available formats')
-
-    subtitles.add_option('--write-sub', '--write-srt',
-            action='store_true', dest='writesubtitles',
-            help='write subtitle file', default=False)
-    subtitles.add_option('--write-auto-sub', '--write-automatic-sub',
-            action='store_true', dest='writeautomaticsub',
-            help='write automatic subtitle file (youtube only)', default=False)
-    subtitles.add_option('--all-subs',
-            action='store_true', dest='allsubtitles',
-            help='downloads all the available subtitles of the video', default=False)
-    subtitles.add_option('--list-subs',
-            action='store_true', dest='listsubtitles',
-            help='lists all available subtitles for the video', default=False)
-    subtitles.add_option('--sub-format',
-            action='store', dest='subtitlesformat', metavar='FORMAT',
-            help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
-    subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang',
-            action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
-            default=[], callback=_comma_separated_values_options_callback,
-            help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
-
-    downloader.add_option('-r', '--rate-limit',
-            dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)')
-    downloader.add_option('-R', '--retries',
-            dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
-    downloader.add_option('--buffer-size',
-            dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16K) (default is %default)', default="1024")
-    downloader.add_option('--no-resize-buffer',
-            action='store_true', dest='noresizebuffer',
-            help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
-    downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
-
-    workarounds.add_option(
-        '--encoding', dest='encoding', metavar='ENCODING',
-        help='Force the specified encoding (experimental)')
-    workarounds.add_option(
-        '--no-check-certificate', action='store_true',
-        dest='no_check_certificate', default=False,
-        help='Suppress HTTPS certificate validation.')
-    workarounds.add_option(
-        '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
-        help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
-    workarounds.add_option(
-        '--user-agent', metavar='UA',
-        dest='user_agent', help='specify a custom user agent')
-    workarounds.add_option(
-        '--referer', metavar='REF',
-        dest='referer', default=None,
-        help='specify a custom referer, use if the video access is restricted to one domain',
-    )
-    workarounds.add_option(
-        '--add-header', metavar='FIELD:VALUE',
-        dest='headers', action='append',
-        help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
-    )
-    workarounds.add_option(
-        '--bidi-workaround', dest='bidi_workaround', action='store_true',
-        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
-
-    verbosity.add_option('-q', '--quiet',
-            action='store_true', dest='quiet', help='activates quiet mode', default=False)
-    verbosity.add_option(
-        '--no-warnings',
-        dest='no_warnings', action='store_true', default=False,
-        help='Ignore warnings')
-    verbosity.add_option('-s', '--simulate',
-            action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
-    verbosity.add_option('--skip-download',
-            action='store_true', dest='skip_download', help='do not download the video', default=False)
-    verbosity.add_option('-g', '--get-url',
-            action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
-    verbosity.add_option('-e', '--get-title',
-            action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
-    verbosity.add_option('--get-id',
-            action='store_true', dest='getid', help='simulate, quiet but print id', default=False)
-    verbosity.add_option('--get-thumbnail',
-            action='store_true', dest='getthumbnail',
-            help='simulate, quiet but print thumbnail URL', default=False)
-    verbosity.add_option('--get-description',
-            action='store_true', dest='getdescription',
-            help='simulate, quiet but print video description', default=False)
-    verbosity.add_option('--get-duration',
-            action='store_true', dest='getduration',
-            help='simulate, quiet but print video length', default=False)
-    verbosity.add_option('--get-filename',
-            action='store_true', dest='getfilename',
-            help='simulate, quiet but print output filename', default=False)
-    verbosity.add_option('--get-format',
-            action='store_true', dest='getformat',
-            help='simulate, quiet but print output format', default=False)
-    verbosity.add_option('-j', '--dump-json',
-            action='store_true', dest='dumpjson',
-            help='simulate, quiet but print JSON information. See --output for a description of available keys.', default=False)
-    verbosity.add_option('--newline',
-            action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
-    verbosity.add_option('--no-progress',
-            action='store_true', dest='noprogress', help='do not print progress bar', default=False)
-    verbosity.add_option('--console-title',
-            action='store_true', dest='consoletitle',
-            help='display progress in console titlebar', default=False)
-    verbosity.add_option('-v', '--verbose',
-            action='store_true', dest='verbose', help='print various debugging information', default=False)
-    verbosity.add_option('--dump-intermediate-pages',
-            action='store_true', dest='dump_intermediate_pages', default=False,
-            help='print downloaded pages to debug problems (very verbose)')
-    verbosity.add_option('--write-pages',
-            action='store_true', dest='write_pages', default=False,
-            help='Write downloaded intermediary pages to files in the current directory to debug problems')
-    verbosity.add_option('--youtube-print-sig-code',
-            action='store_true', dest='youtube_print_sig_code', default=False,
-            help=optparse.SUPPRESS_HELP)
-    verbosity.add_option('--print-traffic',
-            dest='debug_printtraffic', action='store_true', default=False,
-            help='Display sent and read HTTP traffic')
-
-
-    filesystem.add_option('-a', '--batch-file',
-            dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
-    filesystem.add_option('--id',
-            action='store_true', dest='useid', help='use only video ID in file name', default=False)
-    filesystem.add_option('-A', '--auto-number',
-            action='store_true', dest='autonumber',
-            help='number downloaded files starting from 00000', default=False)
-    filesystem.add_option('-o', '--output',
-            dest='outtmpl', metavar='TEMPLATE',
-            help=('output filename template. Use %(title)s to get the title, '
-                  '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
-                  '%(autonumber)s to get an automatically incremented number, '
-                  '%(ext)s for the filename extension, '
-                  '%(format)s for the format description (like "22 - 1280x720" or "HD"), '
-                  '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), '
-                  '%(upload_date)s for the upload date (YYYYMMDD), '
-                  '%(extractor)s for the provider (youtube, metacafe, etc), '
-                  '%(id)s for the video id, %(playlist)s for the playlist the video is in, '
-                  '%(playlist_index)s for the position in the playlist and %% for a literal percent. '
-                  '%(height)s and %(width)s for the width and height of the video format. '
-                  '%(resolution)s for a textual description of the resolution of the video format. '
-                  'Use - to output to stdout. Can also be used to download to a different directory, '
-                  'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
-    filesystem.add_option('--autonumber-size',
-            dest='autonumber_size', metavar='NUMBER',
-            help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
-    filesystem.add_option('--restrict-filenames',
-            action='store_true', dest='restrictfilenames',
-            help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
-    filesystem.add_option('-t', '--title',
-            action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False)
-    filesystem.add_option('-l', '--literal',
-            action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
-    filesystem.add_option('-w', '--no-overwrites',
-            action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
-    filesystem.add_option('-c', '--continue',
-            action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True)
-    filesystem.add_option('--no-continue',
-            action='store_false', dest='continue_dl',
-            help='do not resume partially downloaded files (restart from beginning)')
-    filesystem.add_option('--no-part',
-            action='store_true', dest='nopart', help='do not use .part files', default=False)
-    filesystem.add_option('--no-mtime',
-            action='store_false', dest='updatetime',
-            help='do not use the Last-modified header to set the file modification time', default=True)
-    filesystem.add_option('--write-description',
-            action='store_true', dest='writedescription',
-            help='write video description to a .description file', default=False)
-    filesystem.add_option('--write-info-json',
-            action='store_true', dest='writeinfojson',
-            help='write video metadata to a .info.json file', default=False)
-    filesystem.add_option('--write-annotations',
-            action='store_true', dest='writeannotations',
-            help='write video annotations to a .annotation file', default=False)
-    filesystem.add_option('--write-thumbnail',
-            action='store_true', dest='writethumbnail',
-            help='write thumbnail image to disk', default=False)
-    filesystem.add_option('--load-info',
-            dest='load_info_filename', metavar='FILE',
-            help='json file containing the video information (created with the "--write-json" option)')
-    filesystem.add_option('--cookies',
-            dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
-    filesystem.add_option(
-        '--cache-dir', dest='cachedir', default=None, metavar='DIR',
-        help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
-    filesystem.add_option(
-        '--no-cache-dir', action='store_const', const=False, dest='cachedir',
-        help='Disable filesystem caching')
-    filesystem.add_option(
-        '--rm-cache-dir', action='store_true', dest='rm_cachedir',
-        help='Delete all filesystem cache files')
-
-
-    postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
-            help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
-    postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
-            help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; best by default')
-    postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
-            help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
-    postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None,
-            help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)')
-    postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
-            help='keeps the video file on disk after the post-processing; the video is erased by default')
-    postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
-            help='do not overwrite post-processed files; the post-processed files are overwritten by default')
-    postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
-            help='embed subtitles in the video (only for mp4 videos)')
-    postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False,
-            help='embed thumbnail in the audio as cover art')
-    postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
-            help='write metadata to the video file')
-    postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False,
-            help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
-    postproc.add_option('--prefer-avconv', action='store_false', dest='prefer_ffmpeg',
-        help='Prefer avconv over ffmpeg for running the postprocessors (default)')
-    postproc.add_option('--prefer-ffmpeg', action='store_true', dest='prefer_ffmpeg',
-        help='Prefer ffmpeg over avconv for running the postprocessors')
-    postproc.add_option(
-        '--exec', metavar='CMD', dest='exec_cmd',
-        help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'' )
-
-    parser.add_option_group(general)
-    parser.add_option_group(selection)
-    parser.add_option_group(downloader)
-    parser.add_option_group(filesystem)
-    parser.add_option_group(verbosity)
-    parser.add_option_group(workarounds)
-    parser.add_option_group(video_format)
-    parser.add_option_group(subtitles)
-    parser.add_option_group(authentication)
-    parser.add_option_group(postproc)
-
-    if overrideArguments is not None:
-        opts, args = parser.parse_args(overrideArguments)
-        if opts.verbose:
-            write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
-    else:
-        commandLineConf = sys.argv[1:]
-        if '--ignore-config' in commandLineConf:
-            systemConf = []
-            userConf = []
-        else:
-            systemConf = _readOptions('/etc/youtube-dl.conf')
-            if '--ignore-config' in systemConf:
-                userConf = []
-            else:
-                userConf = _readUserConf()
-        argv = systemConf + userConf + commandLineConf
-
-        opts, args = parser.parse_args(argv)
-        if opts.verbose:
-            write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
-            write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
-            write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
-
-    return parser, opts, args
-
-
 def _real_main(argv=None):
     # Compatibility fixes for Windows
     if sys.platform == 'win32':
index 5a02bea05a0c202316c722be54609de26d213408..e9fceae4c8921ba80b6b659253a46f4e65621f27 100644 (file)
@@ -47,6 +47,7 @@ from .cinemassacre import CinemassacreIE
 from .clipfish import ClipfishIE
 from .cliphunter import CliphunterIE
 from .clipsyndicate import ClipsyndicateIE
+from .cloudy import CloudyIE
 from .clubic import ClubicIE
 from .cmt import CMTIE
 from .cnet import CNETIE
@@ -69,6 +70,7 @@ from .dailymotion import (
 )
 from .daum import DaumIE
 from .dbtv import DBTVIE
+from .deezer import DeezerPlaylistIE
 from .dfb import DFBIE
 from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
@@ -82,6 +84,7 @@ from .dropbox import DropboxIE
 from .ebaumsworld import EbaumsWorldIE
 from .ehow import EHowIE
 from .eighttracks import EightTracksIE
+from .einthusan import EinthusanIE
 from .eitb import EitbIE
 from .ellentv import (
     EllenTVIE,
@@ -141,6 +144,7 @@ from .hark import HarkIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
 from .hornbunny import HornBunnyIE
+from .hostingbulk import HostingBulkIE
 from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
 from .howstuffworks import HowStuffWorksIE
@@ -266,6 +270,7 @@ from .podomatic import PodomaticIE
 from .pornhd import PornHdIE
 from .pornhub import PornHubIE
 from .pornotube import PornotubeIE
+from .pornoxo import PornoXOIE
 from .promptfile import PromptFileIE
 from .prosiebensat1 import ProSiebenSat1IE
 from .pyvideo import PyvideoIE
@@ -324,7 +329,7 @@ from .southpark import (
 )
 from .space import SpaceIE
 from .spankwire import SpankwireIE
-from .spiegel import SpiegelIE
+from .spiegel import SpiegelIE, SpiegelArticleIE
 from .spiegeltv import SpiegeltvIE
 from .spike import SpikeIE
 from .sportdeutschland import SportDeutschlandIE
@@ -345,6 +350,7 @@ from .teachingchannel import TeachingChannelIE
 from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
+from .telemb import TeleMBIE
 from .tenplay import TenPlayIE
 from .testurl import TestURLIE
 from .tf1 import TF1IE
@@ -361,6 +367,7 @@ from .trutube import TruTubeIE
 from .tube8 import Tube8IE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
+from .turbo import TurboIE
 from .tutv import TutvIE
 from .tvigle import TvigleIE
 from .tvp import TvpIE
index a00bfcb35fc8f61b2192592a57776921d0dee9f6..b4b40f2d4f21432f6b12a883513ae00827af00e5 100644 (file)
@@ -75,7 +75,9 @@ class AdultSwimIE(InfoExtractor):
         video_path = mobj.group('path')
 
         webpage = self._download_webpage(url, video_path)
-        episode_id = self._html_search_regex(r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>', webpage, 'episode_id')
+        episode_id = self._html_search_regex(
+            r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>',
+            webpage, 'episode_id')
         title = self._og_search_title(webpage)
 
         index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id
@@ -97,7 +99,9 @@ class AdultSwimIE(InfoExtractor):
             duration = segment_el.attrib.get('duration')
 
             segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id
-            idoc = self._download_xml(segment_url, segment_title, 'Downloading segment information', 'Unable to download segment information')
+            idoc = self._download_xml(
+                segment_url, segment_title,
+                'Downloading segment information', 'Unable to download segment information')
 
             formats = []
             file_els = idoc.findall('.//files/file')
index ef94c72395723b31bd444e80b6ba12d990acf38b..12457f0f996db46d48823836c50e98048162c83c 100644 (file)
@@ -13,6 +13,7 @@ from ..utils import (
     int_or_none,
     parse_duration,
     unified_strdate,
+    xpath_text,
 )
 
 
@@ -157,8 +158,9 @@ class ARDIE(InfoExtractor):
         player_url = mobj.group('mainurl') + '~playerXml.xml'
         doc = self._download_xml(player_url, display_id)
         video_node = doc.find('./video')
-        upload_date = unified_strdate(video_node.find('./broadcastDate').text)
-        thumbnail = video_node.find('.//teaserImage//variant/url').text
+        upload_date = unified_strdate(xpath_text(
+            video_node, './broadcastDate'))
+        thumbnail = xpath_text(video_node, './/teaserImage//variant/url')
 
         formats = []
         for a in video_node.findall('.//asset'):
index 86f0c2861e35f296f594a4ac45bbfe74b799d9e0..4e2960c6260ebf6bf0d242a6b0bfc38baf40b25c 100644 (file)
@@ -28,17 +28,6 @@ class BRIE(InfoExtractor):
                 'duration': 34,
             }
         },
-        {
-            'url': 'http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html',
-            'md5': 'ab451b09d861dbed7d7cc9ab0be19ebe',
-            'info_dict': {
-                'id': '2c060e69-3a27-4e13-b0f0-668fac17d812',
-                'ext': 'mp4',
-                'title': 'Über den Pass',
-                'description': 'Die Eroberung der Alpen: Über den Pass',
-                'duration': 2588,
-            }
-        },
         {
             'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html',
             'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820',
index a62395d4b727ce917f1ea946b63940b3f52b6bdd..c922f695905d70e4052ddfa5c8f336c01221413b 100644 (file)
@@ -42,7 +42,7 @@ class ChilloutzoneIE(InfoExtractor):
             'id': '85523671',
             'ext': 'mp4',
             'title': 'The Sunday Times - Icons',
-            'description': 'md5:a5f7ff82e2f7a9ed77473fe666954e84',
+            'description': 're:(?s)^Watch the making of - makingoficons.com.{300,}',
             'uploader': 'Us',
             'uploader_id': 'usfilms',
             'upload_date': '20140131'
diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py
new file mode 100644 (file)
index 0000000..386f080
--- /dev/null
@@ -0,0 +1,108 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    compat_parse_qs,
+    compat_urllib_parse,
+    remove_end,
+    HEADRequest,
+    compat_HTTPError,
+)
+
+
+class CloudyIE(InfoExtractor):
+    _IE_DESC = 'cloudy.ec and videoraj.ch'
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.ch)/
+        (?:v/|embed\.php\?id=)
+        (?P<id>[A-Za-z0-9]+)
+        '''
+    _EMBED_URL = 'http://www.%s/embed.php?id=%s'
+    _API_URL = 'http://www.%s/api/player.api.php?%s'
+    _MAX_TRIES = 2
+    _TESTS = [
+        {
+            'url': 'https://www.cloudy.ec/v/af511e2527aac',
+            'md5': '5cb253ace826a42f35b4740539bedf07',
+            'info_dict': {
+                'id': 'af511e2527aac',
+                'ext': 'flv',
+                'title': 'Funny Cats and Animals Compilation june 2013',
+            }
+        },
+        {
+            'url': 'http://www.videoraj.ch/v/47f399fd8bb60',
+            'md5': '7d0f8799d91efd4eda26587421c3c3b0',
+            'info_dict': {
+                'id': '47f399fd8bb60',
+                'ext': 'flv',
+                'title': 'Burning a New iPhone 5 with Gasoline - Will it Survive?',
+            }
+        }
+    ]
+
+    def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0):
+
+        if try_num > self._MAX_TRIES - 1:
+            raise ExtractorError('Unable to extract video URL', expected=True)
+
+        form = {
+            'file': video_id,
+            'key': file_key,
+        }
+
+        if error_url:
+            form.update({
+                'numOfErrors': try_num,
+                'errorCode': '404',
+                'errorUrl': error_url,
+            })
+
+        data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode(form))
+        player_data = self._download_webpage(
+            data_url, video_id, 'Downloading player data')
+        data = compat_parse_qs(player_data)
+
+        try_num += 1
+
+        if 'error' in data:
+            raise ExtractorError(
+                '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),
+                expected=True)
+
+        title = data.get('title', [None])[0]
+        if title:
+            title = remove_end(title, '&asdasdas').strip()
+
+        video_url = data.get('url', [None])[0]
+
+        if video_url:
+            try:
+                self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')
+            except ExtractorError as e:
+                if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
+                    self.report_warning('Invalid video URL, requesting another', video_id)
+                    return self._extract_video(video_host, video_id, file_key, video_url, try_num)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+        }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_host = mobj.group('host')
+        video_id = mobj.group('id')
+
+        url = self._EMBED_URL % (video_host, video_id)
+        webpage = self._download_webpage(url, video_id)
+
+        file_key = self._search_regex(
+            r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
+
+        return self._extract_video(video_host, video_id, file_key)
index 6033cd94a1b251d66e7a3f80034bc58b79fa4b55..45d66e2e663fa376cec8f4fc7931e84006ee30b9 100644 (file)
@@ -11,10 +11,10 @@ from ..utils import (
 
 
 class DaumIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:v/|.*?clipid=)(?P<id>[^?#&]+)'
     IE_NAME = 'daum.net'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
         'info_dict': {
             'id': '52554690',
@@ -24,11 +24,17 @@ class DaumIE(InfoExtractor):
             'upload_date': '20130831',
             'duration': 3868,
         },
-    }
+    }, {
+        'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
+        'only_matching': True,
+    }, {
+        'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = mobj.group('id')
         canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
         webpage = self._download_webpage(canonical_url, video_id)
         full_id = self._search_regex(
@@ -42,7 +48,6 @@ class DaumIE(InfoExtractor):
             'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
             video_id, 'Downloading video formats info')
 
-        self.to_screen(u'%s: Getting video urls' % video_id)
         formats = []
         for format_el in urls.findall('result/output_list/output_list'):
             profile = format_el.attrib['profile']
@@ -52,7 +57,7 @@ class DaumIE(InfoExtractor):
             })
             url_doc = self._download_xml(
                 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
-                video_id, note=False)
+                video_id, note='Downloading video data for %s format' % profile)
             format_url = url_doc.find('result/url').text
             formats.append({
                 'url': format_url,
diff --git a/youtube_dl/extractor/deezer.py b/youtube_dl/extractor/deezer.py
new file mode 100644 (file)
index 0000000..c3205ff
--- /dev/null
@@ -0,0 +1,89 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    orderedSet,
+)
+
+
+class DeezerPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?deezer\.com/playlist/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.deezer.com/playlist/176747451',
+        'info_dict': {
+            'id': '176747451',
+            'title': 'Best!',
+            'uploader': 'Anonymous',
+            'thumbnail': 're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$',
+        },
+        'playlist_count': 30,
+        'skip': 'Only available in .de',
+    }
+
+    def _real_extract(self, url):
+        if 'test' not in self._downloader.params:
+            self._downloader.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!')
+
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, playlist_id)
+        geoblocking_msg = self._html_search_regex(
+            r'<p class="soon-txt">(.*?)</p>', webpage, 'geoblocking message',
+            default=None)
+        if geoblocking_msg is not None:
+            raise ExtractorError(
+                'Deezer said: %s' % geoblocking_msg, expected=True)
+
+        data_json = self._search_regex(
+            r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n', webpage, 'data JSON')
+        data = json.loads(data_json)
+
+        playlist_title = data.get('DATA', {}).get('TITLE')
+        playlist_uploader = data.get('DATA', {}).get('PARENT_USERNAME')
+        playlist_thumbnail = self._search_regex(
+            r'<img id="naboo_playlist_image".*?src="([^"]+)"', webpage,
+            'playlist thumbnail')
+
+        preview_pattern = self._search_regex(
+            r"var SOUND_PREVIEW_GATEWAY\s*=\s*'([^']+)';", webpage,
+            'preview URL pattern', fatal=False)
+        entries = []
+        for s in data['SONGS']['data']:
+            puid = s['MD5_ORIGIN']
+            preview_video_url = preview_pattern.\
+                replace('{0}', puid[0]).\
+                replace('{1}', puid).\
+                replace('{2}', s['MEDIA_VERSION'])
+            formats = [{
+                'format_id': 'preview',
+                'url': preview_video_url,
+                'preference': -100,  # Only the first 30 seconds
+                'ext': 'mp3',
+            }]
+            self._sort_formats(formats)
+            artists = ', '.join(
+                orderedSet(a['ART_NAME'] for a in s['ARTISTS']))
+            entries.append({
+                'id': s['SNG_ID'],
+                'duration': int_or_none(s.get('DURATION')),
+                'title': '%s - %s' % (artists, s['SNG_TITLE']),
+                'uploader': s['ART_NAME'],
+                'uploader_id': s['ART_ID'],
+                'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0,
+                'formats': formats,
+            })
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': playlist_title,
+            'uploader': playlist_uploader,
+            'thumbnail': playlist_thumbnail,
+            'entries': entries,
+        }
index cdccfd376b80ee5ebc61c25ab4cd00e12dcfc458..9d6ce1f48cd41c390da308768f131b6c44521629 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import parse_iso8601
 
 
 class DRTVIE(SubtitlesInfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?dr\.dk/tv/se/[^/]+/(?P<id>[\da-z-]+)'
+    _VALID_URL = r'http://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)+(?P<id>[\da-z-]+)(?:[/#?]|$)'
 
     _TEST = {
         'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py
new file mode 100644 (file)
index 0000000..5dfea0d
--- /dev/null
@@ -0,0 +1,61 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class EinthusanIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?einthusan\.com/movies/watch.php\?([^#]*?)id=(?P<id>[0-9]+)'
+    _TESTS = [
+        {
+            'url': 'http://www.einthusan.com/movies/watch.php?id=2447',
+            'md5': 'af244f4458cd667205e513d75da5b8b1',
+            'info_dict': {
+                'id': '2447',
+                'ext': 'mp4',
+                'title': 'Ek Villain',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'description': 'md5:9d29fc91a7abadd4591fb862fa560d93',
+            }
+        },
+        {
+            'url': 'http://www.einthusan.com/movies/watch.php?id=1671',
+            'md5': 'ef63c7a803e22315880ed182c10d1c5c',
+            'info_dict': {
+                'id': '1671',
+                'ext': 'mp4',
+                'title': 'Soodhu Kavvuum',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'description': 'md5:05d8a0c0281a4240d86d76e14f2f4d51',
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        video_title = self._html_search_regex(
+            r'<h1><a class="movie-title".*?>(.*?)</a></h1>', webpage, 'title')
+
+        video_url = self._html_search_regex(
+            r'''(?s)jwplayer\("mediaplayer"\)\.setup\({.*?'file': '([^']+)'.*?}\);''',
+            webpage, 'video url')
+
+        description = self._html_search_meta('description', webpage)
+        thumbnail = self._html_search_regex(
+            r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?)["'].*?/></a>''',
+            webpage, "thumbnail url", fatal=False)
+        if thumbnail is not None:
+            thumbnail = thumbnail.replace('..', 'http://www.einthusan.com')
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'url': video_url,
+            'thumbnail': thumbnail,
+            'description': description,
+        }
index afb34ce511f1bf2f526a03acc9083df2eba2a77c..60e68d98ac68ec5f0ccff4413af70a54bfd75ced 100644 (file)
@@ -12,8 +12,8 @@ from ..utils import (
     compat_urllib_parse,
     compat_urllib_request,
     urlencode_postdata,
-
     ExtractorError,
+    limit_length,
 )
 
 
@@ -29,13 +29,21 @@ class FacebookIE(InfoExtractor):
     _NETRC_MACHINE = 'facebook'
     IE_NAME = 'facebook'
     _TESTS = [{
-        'url': 'https://www.facebook.com/photo.php?v=120708114770723',
-        'md5': '48975a41ccc4b7a581abd68651c1a5a8',
+        'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
+        'md5': '6a40d33c0eccbb1af76cf0485a052659',
+        'info_dict': {
+            'id': '637842556329505',
+            'ext': 'mp4',
+            'duration': 38,
+            'title': 'Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam fin...',
+        }
+    }, {
+        'note': 'Video without discernible title',
+        'url': 'https://www.facebook.com/video.php?v=274175099429670',
         'info_dict': {
-            'id': '120708114770723',
+            'id': '274175099429670',
             'ext': 'mp4',
-            'duration': 279,
-            'title': 'PEOPLE ARE AWESOME 2013',
+            'title': 'Facebook video #274175099429670',
         }
     }, {
         'url': 'https://www.facebook.com/video.php?v=10204634152394104',
@@ -125,7 +133,15 @@ class FacebookIE(InfoExtractor):
             raise ExtractorError('Cannot find video URL')
 
         video_title = self._html_search_regex(
-            r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title')
+            r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
+            fatal=False)
+        if not video_title:
+            video_title = self._html_search_regex(
+                r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
+                webpage, 'alternative title', default=None)
+            video_title = limit_length(video_title, 80)
+        if not video_title:
+            video_title = 'Facebook video #%s' % video_id
 
         return {
             'id': video_id,
index 1b7697870bde93fd5bb4218d12eac8c166a306ba..2bfa20606cd7846b0d15e8c441de3fce2a8982f6 100644 (file)
@@ -628,7 +628,7 @@ class GenericIE(InfoExtractor):
                 embedSWF\(?:\s*
             )
             (["\'])
-                (?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
+                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
                 (?:embed|v)/.+?)
             \1''', webpage)
         if matches:
diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py
new file mode 100644 (file)
index 0000000..8e812b6
--- /dev/null
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    compat_urllib_request,
+    int_or_none,
+    urlencode_postdata,
+)
+
+
+class HostingBulkIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?hostingbulk\.com/
+        (?:embed-)?(?P<id>[A-Za-z0-9]{12})(?:-\d+x\d+)?\.html'''
+    _FILE_DELETED_REGEX = r'<b>File Not Found</b>'
+    _TEST = {
+        'url': 'http://hostingbulk.com/n0ulw1hv20fm.html',
+        'md5': '6c8653c8ecf7ebfa83b76e24b7b2fe3f',
+        'info_dict': {
+            'id': 'n0ulw1hv20fm',
+            'ext': 'mp4',
+            'title': 'md5:5afeba33f48ec87219c269e054afd622',
+            'filesize': 6816081,
+            'thumbnail': 're:^http://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        url = 'http://hostingbulk.com/{0:}.html'.format(video_id)
+
+        # Custom request with cookie to set language to English, so our file
+        # deleted regex would work.
+        request = compat_urllib_request.Request(
+            url, headers={'Cookie': 'lang=english'})
+        webpage = self._download_webpage(request, video_id)
+
+        if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
+            raise ExtractorError('Video %s does not exist' % video_id,
+                                 expected=True)
+
+        title = self._html_search_regex(r'<h3>(.*?)</h3>', webpage, 'title')
+        filesize = int_or_none(
+            self._search_regex(
+                r'<small>\((\d+)\sbytes?\)</small>',
+                webpage,
+                'filesize',
+                fatal=False
+            )
+        )
+        thumbnail = self._search_regex(
+            r'<img src="([^"]+)".+?class="pic"',
+            webpage, 'thumbnail', fatal=False)
+
+        fields = dict(re.findall(r'''(?x)<input\s+
+            type="hidden"\s+
+            name="([^"]+)"\s+
+            value="([^"]*)"
+            ''', webpage))
+
+        request = compat_urllib_request.Request(url, urlencode_postdata(fields))
+        request.add_header('Content-type', 'application/x-www-form-urlencoded')
+        response = self._request_webpage(request, video_id,
+                                         'Submiting download request')
+        video_url = response.geturl()
+
+        formats = [{
+            'format_id': 'sd',
+            'filesize': filesize,
+            'url': video_url,
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
index 79e8430b5e6a85d59ede8490eac5405950c5dabc..a83dd249f6cd5694884158de6471802df6fe2d01 100644 (file)
@@ -9,29 +9,50 @@ from ..utils import (
     parse_iso8601,
     determine_ext,
     int_or_none,
+    float_or_none,
     str_to_int,
 )
 
 
 class IzleseneIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:www|m)\.)?izlesene\.com/(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)'
-    _STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}'
-    _TEST = {
-        'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
-        'md5': '4384f9f0ea65086734b881085ee05ac2',
-        'info_dict': {
-            'id': '7599694',
-            'ext': 'mp4',
-            'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
-            'description': 'Annesi oğluna doğum günü hediyesi olarak minecraft cd si alıyor, ve çocuk hunharca seviniyor',
-            'thumbnail': 're:^http://.*\.jpg',
-            'uploader_id': 'pelikzzle',
-            'timestamp': 1404298698,
-            'upload_date': '20140702',
-            'duration': 95.395,
-            'age_limit': 0,
-        }
-    }
+    _VALID_URL = r'''(?x)
+        https?://(?:(?:www|m)\.)?izlesene\.com/
+        (?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)
+        '''
+    _TESTS = [
+        {
+            'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
+            'md5': '4384f9f0ea65086734b881085ee05ac2',
+            'info_dict': {
+                'id': '7599694',
+                'ext': 'mp4',
+                'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
+                'description': 'md5:253753e2655dde93f59f74b572454f6d',
+                'thumbnail': 're:^http://.*\.jpg',
+                'uploader_id': 'pelikzzle',
+                'timestamp': 1404298698,
+                'upload_date': '20140702',
+                'duration': 95.395,
+                'age_limit': 0,
+            }
+        },
+        {
+            'url': 'http://www.izlesene.com/video/tarkan-dortmund-2006-konseri/17997',
+            'md5': '97f09b6872bffa284cb7fa4f6910cb72',
+            'info_dict': {
+                'id': '17997',
+                'ext': 'mp4',
+                'title': 'Tarkan Dortmund 2006 Konseri',
+                'description': 'Tarkan Dortmund 2006 Konseri',
+                'thumbnail': 're:^http://.*\.jpg',
+                'uploader_id': 'parlayankiz',
+                'timestamp': 1163318593,
+                'upload_date': '20061112',
+                'duration': 253.666,
+                'age_limit': 0,
+            }
+        },
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -45,18 +66,19 @@ class IzleseneIE(InfoExtractor):
         thumbnail = self._og_search_thumbnail(webpage)
 
         uploader = self._html_search_regex(
-            r"adduserUsername\s*=\s*'([^']+)';", webpage, 'uploader', fatal=False, default='')
+            r"adduserUsername\s*=\s*'([^']+)';",
+            webpage, 'uploader', fatal=False, default='')
         timestamp = parse_iso8601(self._html_search_meta(
             'uploadDate', webpage, 'upload date', fatal=False))
 
-        duration = int_or_none(self._html_search_regex(
-            r'"videoduration"\s*:\s*"([^"]+)"', webpage, 'duration', fatal=False))
-        if duration:
-            duration /= 1000.0
+        duration = float_or_none(self._html_search_regex(
+            r'"videoduration"\s*:\s*"([^"]+)"',
+            webpage, 'duration', fatal=False), scale=1000)
 
         view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
         comment_count = self._html_search_regex(
-            r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'uploader', fatal=False)
+            r'comment_count\s*=\s*\'([^\']+)\';',
+            webpage, 'comment_count', fatal=False)
 
         family_friendly = self._html_search_meta(
             'isFamilyFriendly', webpage, 'age limit', fatal=False)
@@ -66,20 +88,26 @@ class IzleseneIE(InfoExtractor):
         ext = determine_ext(content_url, 'mp4')
 
         # Might be empty for some videos.
-        qualities = self._html_search_regex(
-            r'"quality"\s*:\s*"([^"]+)"', webpage, 'qualities', fatal=False, default='')
+        streams = self._html_search_regex(
+            r'"qualitylevel"\s*:\s*"([^"]+)"',
+            webpage, 'streams', fatal=False, default='')
 
         formats = []
-        for quality in qualities.split('|'):
-            json = self._download_json(
-                self._STREAM_URL.format(id=video_id, format=quality), video_id,
-                note='Getting video URL for "%s" quality' % quality,
-                errnote='Failed to get video URL for "%s" quality' % quality
-            )
+        if streams:
+            for stream in streams.split('|'):
+                quality, url = re.search(r'\[(\w+)\](.+)', stream).groups()
+                formats.append({
+                    'format_id': '%sp' % quality if quality else 'sd',
+                    'url': url,
+                    'ext': ext,
+                })
+        else:
+            stream_url = self._search_regex(
+                r'"streamurl"\s?:\s?"([^"]+)"', webpage, 'stream URL')
             formats.append({
-                'url': json.get('streamurl'),
+                'format_id': 'sd',
+                'url': stream_url,
                 'ext': ext,
-                'format_id': '%sp' % quality if quality else 'sd',
             })
 
         return {
index 095965add8d6126df80d1fa724f446fb735c9d9b..f3be8f552c3764995057acf18b74514537960d4e 100644 (file)
@@ -5,24 +5,26 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
-    compat_urllib_parse,
+    ExtractorError,
     compat_urllib_request,
+    urlencode_postdata,
+    xpath_text,
     xpath_with_ns,
 )
 
 _x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'})
-_find = lambda el, p: el.find(_x(p)).text.strip()
 
 
 class NosVideoIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?nosvideo\.com/' + \
                  '(?:embed/|\?v=)(?P<id>[A-Za-z0-9]{12})/?'
     _PLAYLIST_URL = 'http://nosvideo.com/xml/{xml_id:s}.xml'
+    _FILE_DELETED_REGEX = r'<b>File Not Found</b>'
     _TEST = {
-        'url': 'http://nosvideo.com/?v=drlp6s40kg54',
-        'md5': '4b4ac54c6ad5d70ab88f2c2c6ccec71c',
+        'url': 'http://nosvideo.com/?v=mu8fle7g7rpq',
+        'md5': '6124ed47130d8be3eacae635b071e6b6',
         'info_dict': {
-            'id': 'drlp6s40kg54',
+            'id': 'mu8fle7g7rpq',
             'ext': 'mp4',
             'title': 'big_buck_bunny_480p_surround-fix.avi.mp4',
             'thumbnail': 're:^https?://.*\.jpg$',
@@ -38,19 +40,28 @@ class NosVideoIE(InfoExtractor):
             'op': 'download1',
             'method_free': 'Continue to Video',
         }
-        post = compat_urllib_parse.urlencode(fields)
-        req = compat_urllib_request.Request(url, post)
+        req = compat_urllib_request.Request(url, urlencode_postdata(fields))
         req.add_header('Content-type', 'application/x-www-form-urlencoded')
         webpage = self._download_webpage(req, video_id,
                                          'Downloading download page')
+        if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
+            raise ExtractorError('Video %s does not exist' % video_id,
+                                 expected=True)
+
         xml_id = self._search_regex(r'php\|([^\|]+)\|', webpage, 'XML ID')
         playlist_url = self._PLAYLIST_URL.format(xml_id=xml_id)
         playlist = self._download_xml(playlist_url, video_id)
 
         track = playlist.find(_x('.//xspf:track'))
-        title = _find(track, './xspf:title')
-        url = _find(track, './xspf:file')
-        thumbnail = _find(track, './xspf:image')
+        if track is None:
+            raise ExtractorError(
+                'XML playlist is missing the \'track\' element',
+                expected=True)
+        title = xpath_text(track, _x('./xspf:title'), 'title')
+        url = xpath_text(track, _x('./xspf:file'), 'URL', fatal=True)
+        thumbnail = xpath_text(track, _x('./xspf:image'), 'thumbnail')
+        if title is not None:
+            title = title.strip()
 
         formats = [{
             'format_id': 'sd',
index 718fe9aba5fc710ee5efc47c2bbae2b02fc3c117..48ce6e7303e37463f991bb7e74241987b833297a 100644 (file)
@@ -27,47 +27,40 @@ class PornHdIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        title = self._og_search_title(webpage)
-        TITLE_SUFFIX = ' porn HD Video | PornHD.com '
-        if title.endswith(TITLE_SUFFIX):
-            title = title[:-len(TITLE_SUFFIX)]
-
+        title = self._html_search_regex(
+            r'<title>(.+) porn HD.+?</title>', webpage, 'title')
         description = self._html_search_regex(
             r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
         view_count = int_or_none(self._html_search_regex(
-            r'(\d+) views      </span>', webpage, 'view count', fatal=False))
+            r'(\d+) views\s*</span>', webpage, 'view count', fatal=False))
 
-        formats = [
-            {
-                'url': format_url,
-                'ext': format.lower(),
-                'format_id': '%s-%s' % (format.lower(), quality.lower()),
-                'quality': 1 if quality.lower() == 'high' else 0,
-            } for format, quality, format_url in re.findall(
-                r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage)
-        ]
+        videos = re.findall(
+            r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage)
 
         mobj = re.search(r'flashVars = (?P<flashvars>{.+?});', webpage)
         if mobj:
             flashvars = json.loads(mobj.group('flashvars'))
-            formats.extend([
-                {
-                    'url': flashvars['hashlink'].replace('?noProxy=1', ''),
-                    'ext': 'flv',
-                    'format_id': 'flv-low',
-                    'quality': 0,
-                },
-                {
-                    'url': flashvars['hd'].replace('?noProxy=1', ''),
-                    'ext': 'flv',
-                    'format_id': 'flv-high',
-                    'quality': 1,
-                }
-            ])
+            for key, quality in [('hashlink', 'low'), ('hd', 'high')]:
+                redirect_url = flashvars.get(key)
+                if redirect_url:
+                    videos.append(('flv', quality, redirect_url))
             thumbnail = flashvars['urlWallpaper']
         else:
             thumbnail = self._og_search_thumbnail(webpage)
 
+        formats = []
+        for format_, quality, redirect_url in videos:
+            format_id = '%s-%s' % (format_.lower(), quality.lower())
+            video_url = self._download_webpage(
+                redirect_url, video_id, 'Downloading %s video link' % format_id, fatal=False)
+            if not video_url:
+                continue
+            formats.append({
+                'url': video_url,
+                'ext': format_.lower(),
+                'format_id': format_id,
+                'quality': 1 if quality.lower() == 'high' else 0,
+            })
         self._sort_formats(formats)
 
         return {
diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py
new file mode 100644 (file)
index 0000000..202f586
--- /dev/null
@@ -0,0 +1,65 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    str_to_int,
+)
+
+
+class PornoXOIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html'
+    _TEST = {
+        'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html',
+        'md5': '582f28ecbaa9e6e24cb90f50f524ce87',
+        'info_dict': {
+            'id': '7564',
+            'ext': 'flv',
+            'title': 'Striptease From Sexy Secretary!',
+            'description': 'Striptease From Sexy Secretary!',
+            'categories': list,  # NSFW
+            'thumbnail': 're:https?://.*\.jpg$',
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._html_search_regex(
+            r'\'file\'\s*:\s*"([^"]+)"', webpage, 'video_url')
+
+        title = self._html_search_regex(
+            r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title')
+
+        description = self._html_search_regex(
+            r'<meta name="description" content="([^"]+)\s*featuring',
+            webpage, 'description', fatal=False)
+
+        thumbnail = self._html_search_regex(
+            r'\'image\'\s*:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
+
+        view_count = str_to_int(self._html_search_regex(
+            r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False))
+
+        categories_str = self._html_search_regex(
+            r'<meta name="description" content=".*featuring\s*([^"]+)"',
+            webpage, 'categories', fatal=False)
+        categories = (
+            None if categories_str is None
+            else categories_str.split(','))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'categories': categories,
+            'view_count': view_count,
+            'age_limit': 18,
+        }
index da64a1a7b4c0d8bceb89415894c84d651c7ac566..5b2a723c1d8dce6f05fcdd3647c93b48ad41dc5a 100644 (file)
@@ -145,7 +145,6 @@ class ProSiebenSat1IE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Kurztrips zum Valentinstag',
                 'description': 'md5:8ba6301e70351ae0bedf8da00f7ba528',
-                'upload_date': '20130206',
                 'duration': 307.24,
             },
             'params': {
@@ -240,7 +239,7 @@ class ProSiebenSat1IE(InfoExtractor):
         thumbnail = self._og_search_thumbnail(page)
 
         upload_date = unified_strdate(self._html_search_regex(
-            self._UPLOAD_DATE_REGEXES, page, 'upload date', fatal=False))
+            self._UPLOAD_DATE_REGEXES, page, 'upload date', default=None))
 
         formats = []
 
@@ -249,7 +248,7 @@ class ProSiebenSat1IE(InfoExtractor):
             urls_sources = urls_sources.values()
 
         def fix_bitrate(bitrate):
-            return bitrate / 1000 if bitrate % 1000 == 0 else bitrate
+            return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
 
         for source in urls_sources:
             protocol = source['protocol']
index 340a38440d02ad28b5eb6ab19916eee870818c35..9ed7d3b39e227806971fe98f43e1c1018b84ad3c 100644 (file)
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..utils import compat_urlparse
 
 
 class SpiegelIE(InfoExtractor):
@@ -28,16 +29,6 @@ class SpiegelIE(InfoExtractor):
             'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
             'duration': 983,
         },
-    }, {
-        'url': 'http://www.spiegel.de/video/johann-westhauser-videobotschaft-des-hoehlenforschers-video-1502367.html',
-        'md5': '54f58ba0e752e3c07bc2a26222dd0acf',
-        'info_dict': {
-            'id': '1502367',
-            'ext': 'mp4',
-            'title': 'Videobotschaft: Höhlenforscher Westhauser dankt seinen Rettern',
-            'description': 'md5:c6f1ec11413ebd1088b6813943e5fc91',
-            'duration': 42,
-        },
     }]
 
     def _real_extract(self, url):
@@ -82,3 +73,34 @@ class SpiegelIE(InfoExtractor):
             'duration': duration,
             'formats': formats,
         }
+
+
+class SpiegelArticleIE(InfoExtractor):
+    _VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
+    IE_NAME = 'Spiegel:Article'
+    IE_DESC = 'Articles on spiegel.de'
+    _TEST = {
+        'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
+        'info_dict': {
+            'id': '1516455',
+            'ext': 'mp4',
+            'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
+            'description': 're:^Patrick Kämnitz gehört.{100,}',
+        },
+    }
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        video_link = self._search_regex(
+            r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage,
+            'video page URL')
+        video_url = compat_urlparse.urljoin(
+            self.http_scheme() + '//spiegel.de/', video_link)
+
+        return {
+            '_type': 'url',
+            'url': video_url,
+        }
index 5d9d703673265ca4a53a54f28e34494d570cb206..13c6ea67728d040a9e1f17111031952492d921b5 100644 (file)
@@ -52,20 +52,6 @@ class SWRMediathekIE(InfoExtractor):
             'uploader': 'SWR 2',
             'uploader_id': '284670',
         }
-    }, {
-        'url': 'http://swrmediathek.de/content/player.htm?show=52dc7e00-15c5-11e4-84bc-0026b975f2e6',
-        'md5': '881531487d0633080a8cc88d31ef896f',
-        'info_dict': {
-            'id': '52dc7e00-15c5-11e4-84bc-0026b975f2e6',
-            'ext': 'mp4',
-            'title': 'Familienspaß am Bodensee',
-            'description': 'md5:0b591225a32cfde7be1629ed49fe4315',
-            'thumbnail': 're:http://.*\.jpg',
-            'duration': 1784,
-            'upload_date': '20140727',
-            'uploader': 'SWR Fernsehen BW',
-            'uploader_id': '281130',
-        }
     }]
 
     def _real_extract(self, url):
diff --git a/youtube_dl/extractor/telemb.py b/youtube_dl/extractor/telemb.py
new file mode 100644 (file)
index 0000000..1bbd0e7
--- /dev/null
@@ -0,0 +1,78 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import remove_start
+
+
+class TeleMBIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?telemb\.be/(?P<display_id>.+?)_d_(?P<id>\d+)\.html'
+    _TESTS = [
+        {
+            'url': 'http://www.telemb.be/mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-_d_13466.html',
+            'md5': 'f45ea69878516ba039835794e0f8f783',
+            'info_dict': {
+                'id': '13466',
+                'display_id': 'mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-',
+                'ext': 'mp4',
+                'title': 'Mons - Cook with Danielle : des cours de cuisine en anglais ! - Les reportages',
+                'description': 'md5:bc5225f47b17c309761c856ad4776265',
+                'thumbnail': 're:^http://.*\.(?:jpg|png)$',
+            }
+        },
+        {
+            # non-ASCII characters in download URL
+            'url': 'http://telemb.be/les-reportages-havre-incendie-mortel_d_13514.html',
+            'md5': '6e9682736e5ccd4eab7f21e855350733',
+            'info_dict': {
+                'id': '13514',
+                'display_id': 'les-reportages-havre-incendie-mortel',
+                'ext': 'mp4',
+                'title': 'Havré - Incendie mortel - Les reportages',
+                'description': 'md5:5e54cb449acb029c2b7734e2d946bd4a',
+                'thumbnail': 're:^http://.*\.(?:jpg|png)$',
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        formats = []
+        for video_url in re.findall(r'file\s*:\s*"([^"]+)"', webpage):
+            fmt = {
+                'url': video_url,
+                'format_id': video_url.split(':')[0]
+            }
+            rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url)
+            if rtmp:
+                fmt.update({
+                    'play_path': rtmp.group('playpath'),
+                    'app': rtmp.group('app'),
+                    'player_url': 'http://p.jwpcdn.com/6/10/jwplayer.flash.swf',
+                    'page_url': 'http://www.telemb.be',
+                    'preference': -1,
+                })
+            formats.append(fmt)
+        self._sort_formats(formats)
+
+        title = remove_start(self._og_search_title(webpage), 'TéléMB : ')
+        description = self._html_search_regex(
+            r'<meta property="og:description" content="(.+?)" />',
+            webpage, 'description', fatal=False)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
index 2882c1809e0bd55c1e6c8b441c19293aeb64d301..306fe89741cce8b3c281c94349be266f221028b3 100644 (file)
@@ -10,7 +10,7 @@ from ..utils import (
 
 
 class TumblrIE(InfoExtractor):
-    _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)($|/)'
+    _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'
     _TESTS = [{
         'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
         'md5': '479bb068e5b16462f5176a6828829767',
@@ -56,13 +56,15 @@ class TumblrIE(InfoExtractor):
 
         # The only place where you can get a title, it's not complete,
         # but searching in other places doesn't work for all videos
-        video_title = self._html_search_regex(r'<title>(?P<title>.*?)(?: \| Tumblr)?</title>',
-            webpage, 'title', flags=re.DOTALL)
+        video_title = self._html_search_regex(
+            r'(?s)<title>(?P<title>.*?)(?: \| Tumblr)?</title>',
+            webpage, 'title')
 
-        return [{'id': video_id,
-                 'url': video_url,
-                 'title': video_title,
-                 'description': self._html_search_meta('description', webpage),
-                 'thumbnail': video_thumbnail,
-                 'ext': ext
-                 }]
+        return {
+            'id': video_id,
+             'url': video_url,
+             'title': video_title,
+             'description': self._html_search_meta('description', webpage),
+             'thumbnail': video_thumbnail,
+             'ext': ext,
+        }
diff --git a/youtube_dl/extractor/turbo.py b/youtube_dl/extractor/turbo.py
new file mode 100644 (file)
index 0000000..29703a8
--- /dev/null
@@ -0,0 +1,67 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    qualities,
+    xpath_text,
+)
+
+
+class TurboIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?turbo\.fr/videos-voiture/(?P<id>[0-9]+)-'
+    _API_URL = 'http://www.turbo.fr/api/tv/xml.php?player_generique=player_generique&id={0:}'
+    _TEST = {
+        'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html',
+        'md5': '33f4b91099b36b5d5a91f84b5bcba600',
+        'info_dict': {
+            'id': '454443',
+            'ext': 'mp4',
+            'duration': 3715,
+            'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
+            'description': 'Retrouvez dans cette rubrique toutes les vidéos de l\'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        playlist = self._download_xml(self._API_URL.format(video_id), video_id)
+        item = playlist.find('./channel/item')
+        if item is None:
+            raise ExtractorError('Playlist item was not found', expected=True)
+
+        title = xpath_text(item, './title', 'title')
+        duration = int_or_none(xpath_text(item, './durate', 'duration'))
+        thumbnail = xpath_text(item, './visuel_clip', 'thumbnail')
+        description = self._og_search_description(webpage)
+
+        formats = []
+        get_quality = qualities(['3g', 'sd', 'hq'])
+        for child in item:
+            m = re.search(r'url_video_(?P<quality>.+)', child.tag)
+            if m:
+                quality = m.group('quality')
+                formats.append({
+                    'format_id': quality,
+                    'url': child.text,
+                    'quality': get_quality(quality),
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'duration': duration,
+            'thumbnail': thumbnail,
+            'description': description,
+            'formats': formats,
+        }
index a56a7ab5fc2e1c307c9811687ca03b0f4d79e6e5..445e0ec419ccc7eb2e23e522f6f3eba6010dcd69 100644 (file)
@@ -6,13 +6,28 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
+    compat_str,
     parse_iso8601,
     qualities,
 )
 
 
 class TVPlayIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?tvplay\.lv/parraides/[^/]+/(?P<id>\d+)'
+    IE_DESC = 'TV3Play and related services'
+    _VALID_URL = r'''(?x)http://(?:www\.)?
+        (?:tvplay\.lv/parraides|
+           tv3play\.lt/programos|
+           tv3play\.ee/sisu|
+           tv3play\.se/program|
+           tv6play\.se/program|
+           tv8play\.se/program|
+           tv10play\.se/program|
+           tv3play\.no/programmer|
+           viasat4play\.no/programmer|
+           tv6play\.no/programmer|
+           tv3play\.dk/programmer|
+        )/[^/]+/(?P<id>\d+)
+        '''
     _TESTS = [
         {
             'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
@@ -30,6 +45,134 @@ class TVPlayIE(InfoExtractor):
                 'skip_download': True,
             },
         },
+        {
+            'url': 'http://www.tv3play.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
+            'info_dict': {
+                'id': '409229',
+                'ext': 'flv',
+                'title': 'Moterys meluoja geriau',
+                'description': 'md5:9aec0fc68e2cbc992d2a140bd41fa89e',
+                'duration': 1330,
+                'timestamp': 1403769181,
+                'upload_date': '20140626',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.tv3play.ee/sisu/kodu-keset-linna/238551?autostart=true',
+            'info_dict': {
+                'id': '238551',
+                'ext': 'flv',
+                'title': 'Kodu keset linna 398537',
+                'description': 'md5:7df175e3c94db9e47c0d81ffa5d68701',
+                'duration': 1257,
+                'timestamp': 1292449761,
+                'upload_date': '20101215',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.tv3play.se/program/husraddarna/395385?autostart=true',
+            'info_dict': {
+                'id': '395385',
+                'ext': 'flv',
+                'title': 'Husräddarna S02E07',
+                'description': 'md5:f210c6c89f42d4fc39faa551be813777',
+                'duration': 2574,
+                'timestamp': 1400596321,
+                'upload_date': '20140520',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.tv6play.se/program/den-sista-dokusapan/266636?autostart=true',
+            'info_dict': {
+                'id': '266636',
+                'ext': 'flv',
+                'title': 'Den sista dokusåpan S01E08',
+                'description': 'md5:295be39c872520221b933830f660b110',
+                'duration': 1492,
+                'timestamp': 1330522854,
+                'upload_date': '20120229',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.tv8play.se/program/antikjakten/282756?autostart=true',
+            'info_dict': {
+                'id': '282756',
+                'ext': 'flv',
+                'title': 'Antikjakten S01E10',
+                'description': 'md5:1b201169beabd97e20c5ad0ad67b13b8',
+                'duration': 2646,
+                'timestamp': 1348575868,
+                'upload_date': '20120925',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.tv3play.no/programmer/anna-anka-soker-assistent/230898?autostart=true',
+            'info_dict': {
+                'id': '230898',
+                'ext': 'flv',
+                'title': 'Anna Anka søker assistent - Ep. 8',
+                'description': 'md5:f80916bf5bbe1c5f760d127f8dd71474',
+                'duration': 2656,
+                'timestamp': 1277720005,
+                'upload_date': '20100628',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.viasat4play.no/programmer/budbringerne/21873?autostart=true',
+            'info_dict': {
+                'id': '21873',
+                'ext': 'flv',
+                'title': 'Budbringerne program 10',
+                'description': 'md5:4db78dc4ec8a85bb04fd322a3ee5092d',
+                'duration': 1297,
+                'timestamp': 1254205102,
+                'upload_date': '20090929',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.tv6play.no/programmer/hotelinspektor-alex-polizzi/361883?autostart=true',
+            'info_dict': {
+                'id': '361883',
+                'ext': 'flv',
+                'title': 'Hotelinspektør Alex Polizzi - Ep. 10',
+                'description': 'md5:3ecf808db9ec96c862c8ecb3a7fdaf81',
+                'duration': 2594,
+                'timestamp': 1393236292,
+                'upload_date': '20140224',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
     ]
 
     def _real_extract(self, url):
@@ -49,7 +192,7 @@ class TVPlayIE(InfoExtractor):
         quality = qualities(['hls', 'medium', 'high'])
         formats = []
         for format_id, video_url in streams['streams'].items():
-            if not video_url:
+            if not video_url or not isinstance(video_url, compat_str):
                 continue
             fmt = {
                 'format_id': format_id,
index 426369c51bb524b41c63b8377b6ce3f489d0d4b7..2d23effccdff0ba49ff628ded1f72d044fe609d6 100644 (file)
@@ -11,22 +11,48 @@ from ..utils import (
 
 class VpornIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)'
-    _TEST = {
-        'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
-        'md5': 'facf37c1b86546fa0208058546842c55',
-        'info_dict': {
-            'id': '497944',
-            'display_id': 'violet-on-her-th-birthday',
-            'ext': 'mp4',
-            'title': 'Violet on her 19th birthday',
-            'description': 'Violet dances in front of the camera which is sure to get you horny.',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'uploader': 'kileyGrope',
-            'categories': ['Masturbation', 'Teen'],
-            'duration': 393,
-            'age_limit': 18,
-        }
-    }
+    _TESTS = [
+        {
+            'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
+            'md5': 'facf37c1b86546fa0208058546842c55',
+            'info_dict': {
+                'id': '497944',
+                'display_id': 'violet-on-her-th-birthday',
+                'ext': 'mp4',
+                'title': 'Violet on her 19th birthday',
+                'description': 'Violet dances in front of the camera which is sure to get you horny.',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'uploader': 'kileyGrope',
+                'categories': ['Masturbation', 'Teen'],
+                'duration': 393,
+                'age_limit': 18,
+                'view_count': int,
+                'like_count': int,
+                'dislike_count': int,
+                'comment_count': int,
+            }
+        },
+        {
+            'url': 'http://www.vporn.com/female/hana-shower/523564/',
+            'md5': 'ced35a4656198a1664cf2cda1575a25f',
+            'info_dict': {
+                'id': '523564',
+                'display_id': 'hana-shower',
+                'ext': 'mp4',
+                'title': 'Hana Shower',
+                'description': 'Hana showers at the bathroom.',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'uploader': 'Hmmmmm',
+                'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female'],
+                'duration': 588,
+                'age_limit': 18,
+                'view_count': int,
+                'like_count': int,
+                'dislike_count': int,
+                'comment_count': int,
+            }
+        },
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -64,7 +90,7 @@ class VpornIE(InfoExtractor):
 
         formats = []
 
-        for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"([^"]+)"', webpage):
+        for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage):
             video_url = video[1]
             fmt = {
                 'url': video_url,
index 00b6d1eba33a6686319d47846c34476ce8b387c7..4e8fbde8d6bbb072e7fc3475288c6c2e93360993 100644 (file)
@@ -18,7 +18,6 @@ class XHamsterIE(InfoExtractor):
     _TESTS = [
         {
             'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
-            'md5': '8281348b8d3c53d39fffb377d24eac4e',
             'info_dict': {
                 'id': '1509445',
                 'ext': 'mp4',
@@ -31,7 +30,6 @@ class XHamsterIE(InfoExtractor):
         },
         {
             'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
-            'md5': '4cbd8d56708ecb4fb4124c23e4acb81a',
             'info_dict': {
                 'id': '2221348',
                 'ext': 'mp4',
index fcb5ff758deae198614e821dc132871e5fb90679..b86331e3cfa39ec8d3f287e829900b414892beee 100644 (file)
@@ -9,7 +9,7 @@ from ..utils import (
 
 
 class YouJizzIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
+    _VALID_URL = r'^https?://(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
     _TEST = {
         'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
         'file': '2189178.flv',
index d456c4da522d689ac7bcbd33c5f8a3b1204c3b00..7bfda45e76e0d4ca3b6bfd6c3a8ec9f38de453e1 100644 (file)
@@ -23,7 +23,6 @@ class YouPornIE(InfoExtractor):
     _VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
     _TEST = {
         'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
-        'md5': '71ec5fcfddacf80f495efa8b6a8d9a89',
         'info_dict': {
             'id': '505835',
             'ext': 'mp4',
index 70f670682e837745da6b181cc4d06e2970d78e89..b54c69122afb1265acc545cd79daa8ffd09a1752 100644 (file)
@@ -1,5 +1,8 @@
 # coding: utf-8
 
+from __future__ import unicode_literals
+
+
 import itertools
 import json
 import os.path
@@ -69,29 +72,29 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             return
 
         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
-                                  login_page, u'Login GALX parameter')
+                                  login_page, 'Login GALX parameter')
 
         # Log in
         login_form_strs = {
-                u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
-                u'Email': username,
-                u'GALX': galx,
-                u'Passwd': password,
-
-                u'PersistentCookie': u'yes',
-                u'_utf8': u'霱',
-                u'bgresponse': u'js_disabled',
-                u'checkConnection': u'',
-                u'checkedDomains': u'youtube',
-                u'dnConn': u'',
-                u'pstMsg': u'0',
-                u'rmShown': u'1',
-                u'secTok': u'',
-                u'signIn': u'Sign in',
-                u'timeStmp': u'',
-                u'service': u'youtube',
-                u'uilel': u'3',
-                u'hl': u'en_US',
+                'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
+                'Email': username,
+                'GALX': galx,
+                'Passwd': password,
+
+                'PersistentCookie': 'yes',
+                '_utf8': '霱',
+                'bgresponse': 'js_disabled',
+                'checkConnection': '',
+                'checkedDomains': 'youtube',
+                'dnConn': '',
+                'pstMsg': '0',
+                'rmShown': '1',
+                'secTok': '',
+                'signIn': 'Sign in',
+                'timeStmp': '',
+                'service': 'youtube',
+                'uilel': '3',
+                'hl': 'en_US',
         }
 
         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
@@ -132,19 +135,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             timeStmp = match.group(1)
 
             tfa_form_strs = {
-                u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
-                u'smsToken': u'',
-                u'smsUserPin': tfa_code,
-                u'smsVerifyPin': u'Verify',
-
-                u'PersistentCookie': u'yes',
-                u'checkConnection': u'',
-                u'checkedDomains': u'youtube',
-                u'pstMsg': u'1',
-                u'secTok': secTok,
-                u'timeStmp': timeStmp,
-                u'service': u'youtube',
-                u'hl': u'en_US',
+                'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
+                'smsToken': '',
+                'smsUserPin': tfa_code,
+                'smsVerifyPin': 'Verify',
+
+                'PersistentCookie': 'yes',
+                'checkConnection': '',
+                'checkedDomains': 'youtube',
+                'pstMsg': '1',
+                'secTok': secTok,
+                'timeStmp': timeStmp,
+                'service': 'youtube',
+                'hl': 'en_US',
             }
             tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in tfa_form_strs.items())
             tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
@@ -196,10 +199,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 
 
 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
-    IE_DESC = u'YouTube.com'
+    IE_DESC = 'YouTube.com'
     _VALID_URL = r"""(?x)^
                      (
-                         (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional)
+                         (?:https?://|//)                                    # http(s):// or protocol-independent URL
                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
                             (?:www\.)?deturl\.com/www\.youtube\.com/|
                             (?:www\.)?pwnyoutube\.com/|
@@ -217,10 +220,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                              )
                          ))
                          |youtu\.be/                                          # just youtu.be/xxxx
-                         |https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
+                         |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
                          )
                      )?                                                       # all until now is optional -> you can pass the naked ID
                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
+                     (?!.*?&list=)                                            # combined list/video URLs are handled by the playlist IE
                      (?(1).+)?                                                # if we found the ID, everything can follow
                      $"""
     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
@@ -300,7 +304,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         '_rtmp': {'protocol': 'rtmp'},
     }
 
-    IE_NAME = u'youtube'
+    IE_NAME = 'youtube'
     _TESTS = [
         {
             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
@@ -359,7 +363,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             u"info_dict": {
                 u"upload_date": "20121002",
                 u"uploader_id": "8KVIDEO",
-                u"description": "No description available.",
+                u"description": '',
                 u"uploader": "8KVIDEO",
                 u"title": "UHDTV TEST 8K VIDEO.mp4"
             },
@@ -370,30 +374,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         },
         # DASH manifest with encrypted signature
         {
-            u'url': u'https://www.youtube.com/watch?v=IB3lcPjvWLA',
-            u'info_dict': {
-                u'id': u'IB3lcPjvWLA',
-                u'ext': u'm4a',
-                u'title': u'Afrojack - The Spark ft. Spree Wilson',
-                u'description': u'md5:9717375db5a9a3992be4668bbf3bc0a8',
-                u'uploader': u'AfrojackVEVO',
-                u'uploader_id': u'AfrojackVEVO',
-                u'upload_date': u'20131011',
+            'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
+            'info_dict': {
+                'id': 'IB3lcPjvWLA',
+                'ext': 'm4a',
+                'title': 'Afrojack - The Spark ft. Spree Wilson',
+                'description': 'md5:9717375db5a9a3992be4668bbf3bc0a8',
+                'uploader': 'AfrojackVEVO',
+                'uploader_id': 'AfrojackVEVO',
+                'upload_date': '20131011',
             },
             u"params": {
-                u'youtube_include_dash_manifest': True,
-                u'format': '141',
+                'youtube_include_dash_manifest': True,
+                'format': '141',
             },
         },
     ]
 
-
-    @classmethod
-    def suitable(cls, url):
-        """Receives a URL and returns True if suitable for this IE."""
-        if YoutubePlaylistIE.suitable(url): return False
-        return re.match(cls._VALID_URL, url) is not None
-
     def __init__(self, *args, **kwargs):
         super(YoutubeIE, self).__init__(*args, **kwargs)
         self._player_cache = {}
@@ -416,7 +413,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
     def _signature_cache_id(self, example_sig):
         """ Return a string representation of a signature """
-        return u'.'.join(compat_str(len(part)) for part in example_sig.split('.'))
+        return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
 
     def _extract_signature_function(self, video_id, player_url, example_sig):
         id_m = re.match(
@@ -434,7 +431,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
         cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id)
         if cache_spec is not None:
-            return lambda s: u''.join(s[i] for i in cache_spec)
+            return lambda s: ''.join(s[i] for i in cache_spec)
 
         if player_type == 'js':
             code = self._download_webpage(
@@ -453,7 +450,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             assert False, 'Invalid player type %r' % player_type
 
         if cache_spec is None:
-            test_string = u''.join(map(compat_chr, range(len(example_sig))))
+            test_string = ''.join(map(compat_chr, range(len(example_sig))))
             cache_res = res(test_string)
             cache_spec = [ord(c) for c in cache_res]
 
@@ -463,10 +460,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
     def _print_sig_code(self, func, example_sig):
         def gen_sig_code(idxs):
             def _genslice(start, end, step):
-                starts = u'' if start == 0 else str(start)
-                ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
-                steps = u'' if step == 1 else (u':%d' % step)
-                return u's[%s%s%s]' % (starts, ends, steps)
+                starts = '' if start == 0 else str(start)
+                ends = (u':%d' % (end+step)) if end + step >= 0 else ':'
+                steps = '' if step == 1 else (u':%d' % step)
+                return 's[%s%s%s]' % (starts, ends, steps)
 
             step = None
             start = '(Never used)'  # Quelch pyflakes warnings - start will be
@@ -483,26 +480,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                     start = prev
                     continue
                 else:
-                    yield u's[%d]' % prev
+                    yield 's[%d]' % prev
             if step is None:
-                yield u's[%d]' % i
+                yield 's[%d]' % i
             else:
                 yield _genslice(start, i, step)
 
-        test_string = u''.join(map(compat_chr, range(len(example_sig))))
+        test_string = ''.join(map(compat_chr, range(len(example_sig))))
         cache_res = func(test_string)
         cache_spec = [ord(c) for c in cache_res]
-        expr_code = u' + '.join(gen_sig_code(cache_spec))
+        expr_code = ' + '.join(gen_sig_code(cache_spec))
         signature_id_tuple = '(%s)' % (
             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
         code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
-                u'    return %s\n') % (signature_id_tuple, expr_code)
+                '    return %s\n') % (signature_id_tuple, expr_code)
         self.to_screen(u'Extracted signature function:\n' + code)
 
     def _parse_sig_js(self, jscode):
         funcname = self._search_regex(
             r'signature=([$a-zA-Z]+)', jscode,
-             u'Initial JS player signature function name')
+             'Initial JS player signature function name')
 
         jsi = JSInterpreter(jscode)
         initial_function = jsi.extract_function(funcname)
@@ -510,9 +507,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
     def _parse_sig_swf(self, file_contents):
         swfi = SWFInterpreter(file_contents)
-        TARGET_CLASSNAME = u'SignatureDecipher'
+        TARGET_CLASSNAME = 'SignatureDecipher'
         searched_class = swfi.extract_class(TARGET_CLASSNAME)
-        initial_function = swfi.extract_function(searched_class, u'decipher')
+        initial_function = swfi.extract_function(searched_class, 'decipher')
         return lambda s: initial_function([s])
 
     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
@@ -522,7 +519,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             raise ExtractorError(u'Cannot decrypt signature without player_url')
 
         if player_url.startswith(u'//'):
-            player_url = u'https:' + player_url
+            player_url = 'https:' + player_url
         try:
             player_id = (player_url, self._signature_cache_id(s))
             if player_id not in self._player_cache:
@@ -537,7 +534,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         except Exception as e:
             tb = traceback.format_exc()
             raise ExtractorError(
-                u'Signature extraction failed: ' + tb, cause=e)
+                'Signature extraction failed: ' + tb, cause=e)
 
     def _get_available_subtitles(self, video_id, webpage):
         try:
@@ -560,7 +557,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
                 'name': unescapeHTML(l[0]).encode('utf-8'),
             })
-            url = u'https://www.youtube.com/api/timedtext?' + params
+            url = 'https://www.youtube.com/api/timedtext?' + params
             sub_lang_list[lang] = url
         if not sub_lang_list:
             self._downloader.report_warning(u'video doesn\'t have subtitles')
@@ -573,7 +570,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         sub_format = self._downloader.params.get('subtitlesformat', 'srt')
         self.to_screen(u'%s: Looking for automatic captions' % video_id)
         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
-        err_msg = u'Couldn\'t find automatic captions for %s' % video_id
+        err_msg = 'Couldn\'t find automatic captions for %s' % video_id
         if mobj is None:
             self._downloader.report_warning(err_msg)
             return {}
@@ -629,7 +626,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             urls = filter(lambda l: l and not l.startswith('#'),
                             lines)
             return urls
-        manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
+        manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
         formats_urls = _get_urls(manifest)
         for format_url in formats_urls:
             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
@@ -642,8 +639,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
     def _real_extract(self, url):
         proto = (
-            u'http' if self._downloader.params.get('prefer_insecure', False)
-            else u'https')
+            'http' if self._downloader.params.get('prefer_insecure', False)
+            else 'https')
 
         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
         mobj = re.search(self._NEXT_URL_RE, url)
@@ -694,11 +691,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         if 'token' not in video_info:
             if 'reason' in video_info:
                 raise ExtractorError(
-                    u'YouTube said: %s' % video_info['reason'][0],
+                    'YouTube said: %s' % video_info['reason'][0],
                     expected=True, video_id=video_id)
             else:
                 raise ExtractorError(
-                    u'"token" parameter not in video info for unknown reason',
+                    '"token" parameter not in video info for unknown reason',
                     video_id=video_id)
 
         if 'view_count' in video_info:
@@ -731,7 +728,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             video_title = video_info['title'][0]
         else:
             self._downloader.report_warning(u'Unable to extract video title')
-            video_title = u'_'
+            video_title = '_'
 
         # thumbnail image
         # We try first to get a high quality image:
@@ -785,7 +782,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             if fd_mobj:
                 video_description = unescapeHTML(fd_mobj.group(1))
             else:
-                video_description = u''
+                video_description = ''
 
         def _extract_count(count_name):
             count = self._search_regex(
@@ -832,7 +829,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             if m_s is not None:
                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
-            m_s = re_signature.search(args.get('adaptive_fmts', u''))
+            m_s = re_signature.search(args.get('adaptive_fmts', ''))
             if m_s is not None:
                 if 'adaptive_fmts' in video_info:
                     video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
@@ -882,12 +879,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                     if not age_gate:
                         jsplayer_url_json = self._search_regex(
                             r'"assets":.+?"js":\s*("[^"]+")',
-                            video_webpage, u'JS player URL')
+                            video_webpage, 'JS player URL')
                         player_url = json.loads(jsplayer_url_json)
                     if player_url is None:
                         player_url_json = self._search_regex(
                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
-                            video_webpage, u'age gate player URL')
+                            video_webpage, 'age gate player URL')
                         player_url = json.loads(player_url_json)
 
                     if self._downloader.params.get('verbose'):
@@ -898,14 +895,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                             if player_url.endswith('swf'):
                                 player_version = self._search_regex(
                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
-                                    u'flash player', fatal=False)
+                                    'flash player', fatal=False)
                                 player_desc = 'flash player %s' % player_version
                             else:
                                 player_version = self._search_regex(
                                     r'html5player-([^/]+?)(?:/html5player)?\.js',
                                     player_url,
                                     'html5 player', fatal=False)
-                                player_desc = u'html5 player %s' % player_version
+                                player_desc = 'html5 player %s' % player_version
 
                         parts_sizes = self._signature_cache_id(encrypted_sig)
                         self.to_screen(u'{%s} signature length %s, %s' %
@@ -997,7 +994,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         }
 
 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
-    IE_DESC = u'YouTube.com playlists'
+    IE_DESC = 'YouTube.com playlists'
     _VALID_URL = r"""(?x)(?:
                         (?:https?://)?
                         (?:\w+\.)?
@@ -1019,7 +1016,47 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
     _MORE_PAGES_INDICATOR = r'data-link-type="next"'
     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
-    IE_NAME = u'youtube:playlist'
+    IE_NAME = 'youtube:playlist'
+    _TESTS = [{
+        'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
+        'info_dict': {
+            'title': 'ytdl test PL',
+        },
+        'playlist_count': 3,
+    }, {
+        'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
+        'info_dict': {
+            'title': 'YDL_Empty_List',
+        },
+        'playlist_count': 0,
+    }, {
+        'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
+        'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
+        'info_dict': {
+            'title': '29C3: Not my department',
+        },
+        'playlist_count': 95,
+    }, {
+        'note': 'issue #673',
+        'url': 'PLBB231211A4F62143',
+        'info_dict': {
+            'title': 'Team Fortress 2 (Class-based LP)',
+        },
+        'playlist_mincount': 26,
+    }, {
+        'note': 'Large playlist',
+        'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
+        'info_dict': {
+            'title': 'Uploads from Cauchemar',
+        },
+        'playlist_mincount': 799,
+    }, {
+        'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
+        'info_dict': {
+            'title': 'YDL_safe_search',
+        },
+        'playlist_count': 2,
+    }]
 
     def _real_initialize(self):
         self._login()
@@ -1034,7 +1071,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         # the id of the playlist is just 'RD' + video_id
         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
         webpage = self._download_webpage(
-            url, playlist_id, u'Downloading Youtube mix')
+            url, playlist_id, 'Downloading Youtube mix')
         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
         title_span = (
             search_title('playlist-title') or
@@ -1071,7 +1108,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
             return self._extract_mix(playlist_id)
         if playlist_id.startswith('TL'):
             raise ExtractorError(u'For downloading YouTube.com top lists, use '
-                u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
+                'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
 
         url = self._TEMPLATE_URL % playlist_id
         page = self._download_webpage(url, playlist_id)
@@ -1080,7 +1117,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         # Check if the playlist exists or is private
         if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None:
             raise ExtractorError(
-                u'The playlist doesn\'t exist or is private, use --username or '
+                'The playlist doesn\'t exist or is private, use --username or '
                 '--netrc to access it.',
                 expected=True)
 
@@ -1107,17 +1144,18 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
 
         playlist_title = self._html_search_regex(
             r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
-            page, u'title')
+            page, 'title')
 
         url_results = self._ids_to_results(ids)
         return self.playlist_result(url_results, playlist_id, playlist_title)
 
 
 class YoutubeTopListIE(YoutubePlaylistIE):
-    IE_NAME = u'youtube:toplist'
+    IE_NAME = 'youtube:toplist'
     IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
-        u' (Example: "yttoplist:music:Top Tracks")')
+        ' (Example: "yttoplist:music:Top Tracks")')
     _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
+    _TESTS = []
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -1126,7 +1164,7 @@ class YoutubeTopListIE(YoutubePlaylistIE):
         query = compat_urllib_parse.urlencode({'title': title})
         playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)
         channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
-        link = self._html_search_regex(playlist_re, channel_page, u'list')
+        link = self._html_search_regex(playlist_re, channel_page, 'list')
         url = compat_urlparse.urljoin('https://www.youtube.com/', link)
         
         video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
@@ -1134,7 +1172,7 @@ class YoutubeTopListIE(YoutubePlaylistIE):
         # sometimes the webpage doesn't contain the videos
         # retry until we get them
         for i in itertools.count(0):
-            msg = u'Downloading Youtube mix'
+            msg = 'Downloading Youtube mix'
             if i > 0:
                 msg += ', retry #%d' % i
 
@@ -1147,11 +1185,11 @@ class YoutubeTopListIE(YoutubePlaylistIE):
 
 
 class YoutubeChannelIE(InfoExtractor):
-    IE_DESC = u'YouTube.com channels'
+    IE_DESC = 'YouTube.com channels'
     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
     _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
-    IE_NAME = u'youtube:channel'
+    IE_NAME = 'youtube:channel'
 
     def extract_videos_from_page(self, page):
         ids_in_page = []
@@ -1203,12 +1241,12 @@ class YoutubeChannelIE(InfoExtractor):
 
 
 class YoutubeUserIE(InfoExtractor):
-    IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
+    IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
     _GDATA_PAGE_SIZE = 50
     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
-    IE_NAME = u'youtube:user'
+    IE_NAME = 'youtube:user'
 
     @classmethod
     def suitable(cls, url):
@@ -1237,7 +1275,7 @@ class YoutubeUserIE(InfoExtractor):
             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
             page = self._download_webpage(
                 gdata_url, username,
-                u'Downloading video ids from %d to %d' % (
+                'Downloading video ids from %d to %d' % (
                     start_index, start_index + self._GDATA_PAGE_SIZE))
 
             try:
@@ -1265,10 +1303,10 @@ class YoutubeUserIE(InfoExtractor):
 
 
 class YoutubeSearchIE(SearchInfoExtractor):
-    IE_DESC = u'YouTube.com searches'
-    _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
+    IE_DESC = 'YouTube.com searches'
+    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
     _MAX_RESULTS = 1000
-    IE_NAME = u'youtube:search'
+    IE_NAME = 'youtube:search'
     _SEARCH_KEY = 'ytsearch'
 
     def _get_n_results(self, query, n):
@@ -1292,7 +1330,7 @@ class YoutubeSearchIE(SearchInfoExtractor):
 
             if 'items' not in api_response:
                 raise ExtractorError(
-                    u'[youtube] No video results', expected=True)
+                    '[youtube] No video results', expected=True)
 
             new_ids = list(video['id'] for video in api_response['items'])
             video_ids += new_ids
@@ -1311,12 +1349,12 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
     _SEARCH_KEY = 'ytsearchdate'
-    IE_DESC = u'YouTube.com searches, newest videos first'
+    IE_DESC = 'YouTube.com searches, newest videos first'
 
 
 class YoutubeSearchURLIE(InfoExtractor):
-    IE_DESC = u'YouTube.com search URLs'
-    IE_NAME = u'youtube:search_url'
+    IE_DESC = 'YouTube.com search URLs'
+    IE_NAME = 'youtube:search_url'
     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
 
     def _real_extract(self, url):
@@ -1325,7 +1363,7 @@ class YoutubeSearchURLIE(InfoExtractor):
 
         webpage = self._download_webpage(url, query)
         result_code = self._search_regex(
-            r'(?s)<ol class="item-section"(.*?)</ol>', webpage, u'result HTML')
+            r'(?s)<ol class="item-section"(.*?)</ol>', webpage, 'result HTML')
 
         part_codes = re.findall(
             r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
@@ -1351,14 +1389,14 @@ class YoutubeSearchURLIE(InfoExtractor):
 
 
 class YoutubeShowIE(InfoExtractor):
-    IE_DESC = u'YouTube.com (multi-season) shows'
+    IE_DESC = 'YouTube.com (multi-season) shows'
     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
-    IE_NAME = u'youtube:show'
+    IE_NAME = 'youtube:show'
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         show_name = mobj.group(1)
-        webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
+        webpage = self._download_webpage(url, show_name, 'Downloading show webpage')
         # There's one playlist for each season of the show
         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
@@ -1384,7 +1422,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
 
     @property
     def IE_NAME(self):
-        return u'youtube:%s' % self._FEED_NAME
+        return 'youtube:%s' % self._FEED_NAME
 
     def _real_initialize(self):
         self._login()
@@ -1394,9 +1432,10 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
         paging = 0
         for i in itertools.count(1):
             info = self._download_json(self._FEED_TEMPLATE % paging,
-                                          u'%s feed' % self._FEED_NAME,
-                                          u'Downloading page %s' % i)
+                                          '%s feed' % self._FEED_NAME,
+                                          'Downloading page %s' % i)
             feed_html = info.get('feed_html') or info.get('content_html')
+            load_more_widget_html = info.get('load_more_widget_html') or feed_html
             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
             ids = orderedSet(m.group(1) for m in m_ids)
             feed_entries.extend(
@@ -1404,51 +1443,52 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
                 for video_id in ids)
             mobj = re.search(
                 r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
-                feed_html)
+                load_more_widget_html)
             if mobj is None:
                 break
             paging = mobj.group('paging')
         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
 
 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
+    IE_DESC = 'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
     _FEED_NAME = 'recommended'
-    _PLAYLIST_TITLE = u'Youtube Recommended videos'
+    _PLAYLIST_TITLE = 'Youtube Recommended videos'
 
 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
+    IE_DESC = 'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
     _FEED_NAME = 'watch_later'
-    _PLAYLIST_TITLE = u'Youtube Watch Later'
+    _PLAYLIST_TITLE = 'Youtube Watch Later'
     _PERSONAL_FEED = True
 
 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
-    _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
+    IE_DESC = 'Youtube watch history, "ythistory" keyword (requires authentication)'
+    _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
     _FEED_NAME = 'history'
     _PERSONAL_FEED = True
-    _PLAYLIST_TITLE = u'Youtube Watch History'
+    _PLAYLIST_TITLE = 'Youtube Watch History'
 
 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
-    IE_NAME = u'youtube:favorites'
-    IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
+    IE_NAME = 'youtube:favorites'
+    IE_DESC = 'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
     _LOGIN_REQUIRED = True
 
     def _real_extract(self, url):
         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
-        playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
+        playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
         return self.url_result(playlist_id, 'YoutubePlaylist')
 
 
 class YoutubeSubscriptionsIE(YoutubePlaylistIE):
-    IE_NAME = u'youtube:subscriptions'
-    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
+    IE_NAME = 'youtube:subscriptions'
+    IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
+    _TESTS = []
 
     def _real_extract(self, url):
-        title = u'Youtube Subscriptions'
+        title = 'Youtube Subscriptions'
         page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title)
 
         # The extraction process is the same as for playlists, but the regex
@@ -1500,9 +1540,9 @@ class YoutubeTruncatedURLIE(InfoExtractor):
 
     def _real_extract(self, url):
         raise ExtractorError(
-            u'Did you forget to quote the URL? Remember that & is a meta '
-            u'character in most shells, so you want to put the URL in quotes, '
-            u'like  youtube-dl '
-            u'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
-            u' or simply  youtube-dl BaW_jenozKc  .',
+            'Did you forget to quote the URL? Remember that & is a meta '
+            'character in most shells, so you want to put the URL in quotes, '
+            'like  youtube-dl '
+            '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
+            ' or simply  youtube-dl BaW_jenozKc  .',
             expected=True)
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
new file mode 100644 (file)
index 0000000..31baab4
--- /dev/null
@@ -0,0 +1,481 @@
+from __future__ import unicode_literals
+
+import os.path
+import optparse
+import shlex
+import sys
+
+from .utils import (
+    get_term_width,
+    write_string,
+)
+from .version import __version__
+
+
+def parseOpts(overrideArguments=None):
+    def _readOptions(filename_bytes, default=[]):
+        try:
+            optionf = open(filename_bytes)
+        except IOError:
+            return default  # silently skip if file is not present
+        try:
+            res = []
+            for l in optionf:
+                res += shlex.split(l, comments=True)
+        finally:
+            optionf.close()
+        return res
+
+    def _readUserConf():
+        xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
+        if xdg_config_home:
+            userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
+            if not os.path.isfile(userConfFile):
+                userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
+        else:
+            userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
+            if not os.path.isfile(userConfFile):
+                userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
+        userConf = _readOptions(userConfFile, None)
+
+        if userConf is None:
+            appdata_dir = os.environ.get('appdata')
+            if appdata_dir:
+                userConf = _readOptions(
+                    os.path.join(appdata_dir, 'youtube-dl', 'config'),
+                    default=None)
+                if userConf is None:
+                    userConf = _readOptions(
+                        os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
+                        default=None)
+
+        if userConf is None:
+            userConf = _readOptions(
+                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
+                default=None)
+        if userConf is None:
+            userConf = _readOptions(
+                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
+                default=None)
+
+        if userConf is None:
+            userConf = []
+
+        return userConf
+
+    def _format_option_string(option):
+        ''' ('-o', '--option') -> -o, --format METAVAR'''
+
+        opts = []
+
+        if option._short_opts:
+            opts.append(option._short_opts[0])
+        if option._long_opts:
+            opts.append(option._long_opts[0])
+        if len(opts) > 1:
+            opts.insert(1, ', ')
+
+        if option.takes_value(): opts.append(' %s' % option.metavar)
+
+        return "".join(opts)
+
+    def _comma_separated_values_options_callback(option, opt_str, value, parser):
+        setattr(parser.values, option.dest, value.split(','))
+
+    def _hide_login_info(opts):
+        opts = list(opts)
+        for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:
+            try:
+                i = opts.index(private_opt)
+                opts[i+1] = '<PRIVATE>'
+            except ValueError:
+                pass
+        return opts
+
+    max_width = 80
+    max_help_position = 80
+
+    # No need to wrap help messages if we're on a wide console
+    columns = get_term_width()
+    if columns: max_width = columns
+
+    fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
+    fmt.format_option_strings = _format_option_string
+
+    kw = {
+        'version'   : __version__,
+        'formatter' : fmt,
+        'usage' : '%prog [options] url [url...]',
+        'conflict_handler' : 'resolve',
+    }
+
+    parser = optparse.OptionParser(**kw)
+
+    # option groups
+    general        = optparse.OptionGroup(parser, 'General Options')
+    selection      = optparse.OptionGroup(parser, 'Video Selection')
+    authentication = optparse.OptionGroup(parser, 'Authentication Options')
+    video_format   = optparse.OptionGroup(parser, 'Video Format Options')
+    subtitles      = optparse.OptionGroup(parser, 'Subtitle Options')
+    downloader     = optparse.OptionGroup(parser, 'Download Options')
+    postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
+    filesystem     = optparse.OptionGroup(parser, 'Filesystem Options')
+    workarounds    = optparse.OptionGroup(parser, 'Workarounds')
+    verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
+
+    general.add_option('-h', '--help',
+            action='help', help='print this help text and exit')
+    general.add_option('-v', '--version',
+            action='version', help='print program version and exit')
+    general.add_option('-U', '--update',
+            action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
+    general.add_option('-i', '--ignore-errors',
+            action='store_true', dest='ignoreerrors', help='continue on download errors, for example to skip unavailable videos in a playlist', default=False)
+    general.add_option('--abort-on-error',
+            action='store_false', dest='ignoreerrors',
+            help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
+    general.add_option('--dump-user-agent',
+            action='store_true', dest='dump_user_agent',
+            help='display the current browser identification', default=False)
+    general.add_option('--list-extractors',
+            action='store_true', dest='list_extractors',
+            help='List all supported extractors and the URLs they would handle', default=False)
+    general.add_option('--extractor-descriptions',
+            action='store_true', dest='list_extractor_descriptions',
+            help='Output descriptions of all supported extractors', default=False)
+    general.add_option(
+        '--proxy', dest='proxy', default=None, metavar='URL',
+        help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
+    general.add_option(
+        '--socket-timeout', dest='socket_timeout',
+        type=float, default=None, help=u'Time to wait before giving up, in seconds')
+    general.add_option(
+        '--default-search',
+        dest='default_search', metavar='PREFIX',
+        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
+    general.add_option(
+        '--ignore-config',
+        action='store_true',
+        help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
+
+    selection.add_option(
+        '--playlist-start',
+        dest='playliststart', metavar='NUMBER', default=1, type=int,
+        help='playlist video to start at (default is %default)')
+    selection.add_option(
+        '--playlist-end',
+        dest='playlistend', metavar='NUMBER', default=None, type=int,
+        help='playlist video to end at (default is last)')
+    selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
+    selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
+    selection.add_option('--max-downloads', metavar='NUMBER',
+                         dest='max_downloads', type=int, default=None,
+                         help='Abort after downloading NUMBER files')
+    selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
+    selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
+    selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
+    selection.add_option(
+        '--datebefore', metavar='DATE', dest='datebefore', default=None,
+        help='download only videos uploaded on or before this date (i.e. inclusive)')
+    selection.add_option(
+        '--dateafter', metavar='DATE', dest='dateafter', default=None,
+        help='download only videos uploaded on or after this date (i.e. inclusive)')
+    selection.add_option(
+        '--min-views', metavar='COUNT', dest='min_views',
+        default=None, type=int,
+        help="Do not download any videos with less than COUNT views",)
+    selection.add_option(
+        '--max-views', metavar='COUNT', dest='max_views',
+        default=None, type=int,
+        help="Do not download any videos with more than COUNT views",)
+    selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
+    selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
+                         help='download only videos suitable for the given age',
+                         default=None, type=int)
+    selection.add_option('--download-archive', metavar='FILE',
+                         dest='download_archive',
+                         help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
+    selection.add_option(
+        '--include-ads', dest='include_ads',
+        action='store_true',
+        help='Download advertisements as well (experimental)')
+    selection.add_option(
+        '--youtube-include-dash-manifest', action='store_true',
+        dest='youtube_include_dash_manifest', default=False,
+        help='Try to download the DASH manifest on YouTube videos (experimental)')
+
+    authentication.add_option('-u', '--username',
+            dest='username', metavar='USERNAME', help='account username')
+    authentication.add_option('-p', '--password',
+            dest='password', metavar='PASSWORD', help='account password')
+    authentication.add_option('-2', '--twofactor',
+            dest='twofactor', metavar='TWOFACTOR', help='two-factor auth code')
+    authentication.add_option('-n', '--netrc',
+            action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
+    authentication.add_option('--video-password',
+            dest='videopassword', metavar='PASSWORD', help='video password (vimeo, smotri)')
+
+
+    video_format.add_option('-f', '--format',
+            action='store', dest='format', metavar='FORMAT', default=None,
+            help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.')
+    video_format.add_option('--all-formats',
+            action='store_const', dest='format', help='download all available video formats', const='all')
+    video_format.add_option('--prefer-free-formats',
+            action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
+    video_format.add_option('--max-quality',
+            action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
+    video_format.add_option('-F', '--list-formats',
+            action='store_true', dest='listformats', help='list all available formats')
+
+    subtitles.add_option('--write-sub', '--write-srt',
+            action='store_true', dest='writesubtitles',
+            help='write subtitle file', default=False)
+    subtitles.add_option('--write-auto-sub', '--write-automatic-sub',
+            action='store_true', dest='writeautomaticsub',
+            help='write automatic subtitle file (youtube only)', default=False)
+    subtitles.add_option('--all-subs',
+            action='store_true', dest='allsubtitles',
+            help='downloads all the available subtitles of the video', default=False)
+    subtitles.add_option('--list-subs',
+            action='store_true', dest='listsubtitles',
+            help='lists all available subtitles for the video', default=False)
+    subtitles.add_option('--sub-format',
+            action='store', dest='subtitlesformat', metavar='FORMAT',
+            help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
+    subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang',
+            action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
+            default=[], callback=_comma_separated_values_options_callback,
+            help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
+
+    downloader.add_option('-r', '--rate-limit',
+            dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)')
+    downloader.add_option('-R', '--retries',
+            dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
+    downloader.add_option('--buffer-size',
+            dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16K) (default is %default)', default="1024")
+    downloader.add_option('--no-resize-buffer',
+            action='store_true', dest='noresizebuffer',
+            help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
+    downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
+
+    workarounds.add_option(
+        '--encoding', dest='encoding', metavar='ENCODING',
+        help='Force the specified encoding (experimental)')
+    workarounds.add_option(
+        '--no-check-certificate', action='store_true',
+        dest='no_check_certificate', default=False,
+        help='Suppress HTTPS certificate validation.')
+    workarounds.add_option(
+        '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
+        help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
+    workarounds.add_option(
+        '--user-agent', metavar='UA',
+        dest='user_agent', help='specify a custom user agent')
+    workarounds.add_option(
+        '--referer', metavar='REF',
+        dest='referer', default=None,
+        help='specify a custom referer, use if the video access is restricted to one domain',
+    )
+    workarounds.add_option(
+        '--add-header', metavar='FIELD:VALUE',
+        dest='headers', action='append',
+        help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
+    )
+    workarounds.add_option(
+        '--bidi-workaround', dest='bidi_workaround', action='store_true',
+        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
+
+    verbosity.add_option('-q', '--quiet',
+            action='store_true', dest='quiet', help='activates quiet mode', default=False)
+    verbosity.add_option(
+        '--no-warnings',
+        dest='no_warnings', action='store_true', default=False,
+        help='Ignore warnings')
+    verbosity.add_option('-s', '--simulate',
+            action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
+    verbosity.add_option('--skip-download',
+            action='store_true', dest='skip_download', help='do not download the video', default=False)
+    verbosity.add_option('-g', '--get-url',
+            action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
+    verbosity.add_option('-e', '--get-title',
+            action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
+    verbosity.add_option('--get-id',
+            action='store_true', dest='getid', help='simulate, quiet but print id', default=False)
+    verbosity.add_option('--get-thumbnail',
+            action='store_true', dest='getthumbnail',
+            help='simulate, quiet but print thumbnail URL', default=False)
+    verbosity.add_option('--get-description',
+            action='store_true', dest='getdescription',
+            help='simulate, quiet but print video description', default=False)
+    verbosity.add_option('--get-duration',
+            action='store_true', dest='getduration',
+            help='simulate, quiet but print video length', default=False)
+    verbosity.add_option('--get-filename',
+            action='store_true', dest='getfilename',
+            help='simulate, quiet but print output filename', default=False)
+    verbosity.add_option('--get-format',
+            action='store_true', dest='getformat',
+            help='simulate, quiet but print output format', default=False)
+    verbosity.add_option('-j', '--dump-json',
+            action='store_true', dest='dumpjson',
+            help='simulate, quiet but print JSON information. See --output for a description of available keys.', default=False)
+    verbosity.add_option('--newline',
+            action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
+    verbosity.add_option('--no-progress',
+            action='store_true', dest='noprogress', help='do not print progress bar', default=False)
+    verbosity.add_option('--console-title',
+            action='store_true', dest='consoletitle',
+            help='display progress in console titlebar', default=False)
+    verbosity.add_option('-v', '--verbose',
+            action='store_true', dest='verbose', help='print various debugging information', default=False)
+    verbosity.add_option('--dump-intermediate-pages',
+            action='store_true', dest='dump_intermediate_pages', default=False,
+            help='print downloaded pages to debug problems (very verbose)')
+    verbosity.add_option('--write-pages',
+            action='store_true', dest='write_pages', default=False,
+            help='Write downloaded intermediary pages to files in the current directory to debug problems')
+    verbosity.add_option('--youtube-print-sig-code',
+            action='store_true', dest='youtube_print_sig_code', default=False,
+            help=optparse.SUPPRESS_HELP)
+    verbosity.add_option('--print-traffic',
+            dest='debug_printtraffic', action='store_true', default=False,
+            help='Display sent and read HTTP traffic')
+
+
+    filesystem.add_option('-a', '--batch-file',
+            dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
+    filesystem.add_option('--id',
+            action='store_true', dest='useid', help='use only video ID in file name', default=False)
+    filesystem.add_option('-A', '--auto-number',
+            action='store_true', dest='autonumber',
+            help='number downloaded files starting from 00000', default=False)
+    filesystem.add_option('-o', '--output',
+            dest='outtmpl', metavar='TEMPLATE',
+            help=('output filename template. Use %(title)s to get the title, '
+                  '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
+                  '%(autonumber)s to get an automatically incremented number, '
+                  '%(ext)s for the filename extension, '
+                  '%(format)s for the format description (like "22 - 1280x720" or "HD"), '
+                  '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), '
+                  '%(upload_date)s for the upload date (YYYYMMDD), '
+                  '%(extractor)s for the provider (youtube, metacafe, etc), '
+                  '%(id)s for the video id, %(playlist)s for the playlist the video is in, '
+                  '%(playlist_index)s for the position in the playlist and %% for a literal percent. '
+                  '%(height)s and %(width)s for the width and height of the video format. '
+                  '%(resolution)s for a textual description of the resolution of the video format. '
+                  'Use - to output to stdout. Can also be used to download to a different directory, '
+                  'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
+    filesystem.add_option('--autonumber-size',
+            dest='autonumber_size', metavar='NUMBER',
+            help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
+    filesystem.add_option('--restrict-filenames',
+            action='store_true', dest='restrictfilenames',
+            help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
+    filesystem.add_option('-t', '--title',
+            action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False)
+    filesystem.add_option('-l', '--literal',
+            action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
+    filesystem.add_option('-w', '--no-overwrites',
+            action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
+    filesystem.add_option('-c', '--continue',
+            action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True)
+    filesystem.add_option('--no-continue',
+            action='store_false', dest='continue_dl',
+            help='do not resume partially downloaded files (restart from beginning)')
+    filesystem.add_option('--no-part',
+            action='store_true', dest='nopart', help='do not use .part files', default=False)
+    filesystem.add_option('--no-mtime',
+            action='store_false', dest='updatetime',
+            help='do not use the Last-modified header to set the file modification time', default=True)
+    filesystem.add_option('--write-description',
+            action='store_true', dest='writedescription',
+            help='write video description to a .description file', default=False)
+    filesystem.add_option('--write-info-json',
+            action='store_true', dest='writeinfojson',
+            help='write video metadata to a .info.json file', default=False)
+    filesystem.add_option('--write-annotations',
+            action='store_true', dest='writeannotations',
+            help='write video annotations to a .annotation file', default=False)
+    filesystem.add_option('--write-thumbnail',
+            action='store_true', dest='writethumbnail',
+            help='write thumbnail image to disk', default=False)
+    filesystem.add_option('--load-info',
+            dest='load_info_filename', metavar='FILE',
+            help='json file containing the video information (created with the "--write-json" option)')
+    filesystem.add_option('--cookies',
+            dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
+    filesystem.add_option(
+        '--cache-dir', dest='cachedir', default=None, metavar='DIR',
+        help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
+    filesystem.add_option(
+        '--no-cache-dir', action='store_const', const=False, dest='cachedir',
+        help='Disable filesystem caching')
+    filesystem.add_option(
+        '--rm-cache-dir', action='store_true', dest='rm_cachedir',
+        help='Delete all filesystem cache files')
+
+
+    postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
+            help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
+    postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
+            help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; best by default')
+    postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
+            help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
+    postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None,
+            help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)')
+    postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
+            help='keeps the video file on disk after the post-processing; the video is erased by default')
+    postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
+            help='do not overwrite post-processed files; the post-processed files are overwritten by default')
+    postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
+            help='embed subtitles in the video (only for mp4 videos)')
+    postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False,
+            help='embed thumbnail in the audio as cover art')
+    postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
+            help='write metadata to the video file')
+    postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False,
+            help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
+    postproc.add_option('--prefer-avconv', action='store_false', dest='prefer_ffmpeg',
+        help='Prefer avconv over ffmpeg for running the postprocessors (default)')
+    postproc.add_option('--prefer-ffmpeg', action='store_true', dest='prefer_ffmpeg',
+        help='Prefer ffmpeg over avconv for running the postprocessors')
+    postproc.add_option(
+        '--exec', metavar='CMD', dest='exec_cmd',
+        help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'' )
+
+    parser.add_option_group(general)
+    parser.add_option_group(selection)
+    parser.add_option_group(downloader)
+    parser.add_option_group(filesystem)
+    parser.add_option_group(verbosity)
+    parser.add_option_group(workarounds)
+    parser.add_option_group(video_format)
+    parser.add_option_group(subtitles)
+    parser.add_option_group(authentication)
+    parser.add_option_group(postproc)
+
+    if overrideArguments is not None:
+        opts, args = parser.parse_args(overrideArguments)
+        if opts.verbose:
+            write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
+    else:
+        commandLineConf = sys.argv[1:]
+        if '--ignore-config' in commandLineConf:
+            systemConf = []
+            userConf = []
+        else:
+            systemConf = _readOptions('/etc/youtube-dl.conf')
+            if '--ignore-config' in systemConf:
+                userConf = []
+            else:
+                userConf = _readUserConf()
+        argv = systemConf + userConf + commandLineConf
+
+        opts, args = parser.parse_args(argv)
+        if opts.verbose:
+            write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
+            write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
+            write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
+
+    return parser, opts, args
index 0bc410e91b01d08bd01f3b8fe2315b4b2e2b97e0..b644f4e920bf0353658ec9920abdb0541dbaf0e2 100644 (file)
@@ -280,6 +280,11 @@ if sys.version_info >= (2, 7):
         return node.find(expr)
 else:
     def find_xpath_attr(node, xpath, key, val):
+        # Here comes the crazy part: In 2.6, if the xpath is a unicode,
+        # .//node does not match if a node is a direct child of . !
+        if isinstance(xpath, unicode):
+            xpath = xpath.encode('ascii')
+
         for f in node.findall(xpath):
             if f.attrib.get(key) == val:
                 return f
@@ -299,6 +304,20 @@ def xpath_with_ns(path, ns_map):
     return '/'.join(replaced)
 
 
+def xpath_text(node, xpath, name=None, fatal=False):
+    if sys.version_info < (2, 7):  # Crazy 2.6
+        xpath = xpath.encode('ascii')
+
+    n = node.find(xpath)
+    if n is None:
+        if fatal:
+            name = xpath if name is None else name
+            raise ExtractorError('Could not find XML element %s' % name)
+        else:
+            return None
+    return n.text
+
+
 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
 class BaseHTMLParser(compat_html_parser.HTMLParser):
     def __init(self):
@@ -617,7 +636,7 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
                     self.sock = sock
                     self._tunnel()
                 try:
-                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
+                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
                 except ssl.SSLError:
                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
 
@@ -625,8 +644,14 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
             def https_open(self, req):
                 return self.do_open(HTTPSConnectionV3, req)
         return HTTPSHandlerV3(**kwargs)
-    else:
-        context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
+    elif hasattr(ssl, 'create_default_context'):  # Python >= 3.4
+        context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
+        context.options &= ~ssl.OP_NO_SSLv3  # Allow older, not-as-secure SSLv3
+        if opts_no_check_certificate:
+            context.verify_mode = ssl.CERT_NONE
+        return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
+    else:  # Python < 3.4
+        context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
         context.verify_mode = (ssl.CERT_NONE
                                if opts_no_check_certificate
                                else ssl.CERT_REQUIRED)
@@ -1412,6 +1437,24 @@ def uppercase_escape(s):
         lambda m: unicode_escape(m.group(0))[0],
         s)
 
+
+def escape_rfc3986(s):
+    """Escape non-ASCII characters as suggested by RFC 3986"""
+    if sys.version_info < (3, 0) and isinstance(s, unicode):
+        s = s.encode('utf-8')
+    return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]")
+
+
+def escape_url(url):
+    """Escape URL as suggested by RFC 3986"""
+    url_parsed = compat_urllib_parse_urlparse(url)
+    return url_parsed._replace(
+        path=escape_rfc3986(url_parsed.path),
+        params=escape_rfc3986(url_parsed.params),
+        query=escape_rfc3986(url_parsed.query),
+        fragment=escape_rfc3986(url_parsed.fragment)
+    ).geturl()
+
 try:
     struct.pack(u'!I', 0)
 except TypeError:
@@ -1546,3 +1589,13 @@ except AttributeError:
         if ret:
             raise subprocess.CalledProcessError(ret, p.args, output=output)
         return output
+
+
+def limit_length(s, length):
+    """ Add ellipses to overly long strings """
+    if s is None:
+        return None
+    ELLIPSES = '...'
+    if len(s) > length:
+        return s[:length - len(ELLIPSES)] + ELLIPSES
+    return s
index ecc2e0f53c8424f9a0bec0a1504f0252baf24460..cf0d862da60105fe1f76efc2fecf7106e2fdeb03 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.09.06'
+__version__ = '2014.09.15.1'