Merge commit '98703c7fbfcf06348220aa63f9422cdd792cfe1a'
authorPhilipp Hagemeister <phihag@phihag.de>
Mon, 15 Sep 2014 21:26:54 +0000 (23:26 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Mon, 15 Sep 2014 21:26:54 +0000 (23:26 +0200)
119 files changed:
.gitignore
LATEST_VERSION [deleted file]
MANIFEST.in
Makefile
README.md
devscripts/fish-completion.in [new file with mode: 0644]
devscripts/fish-completion.py [new file with mode: 0755]
devscripts/release.sh
setup.py
test/helper.py
test/parameters.json
test/test_all_urls.py
test/test_cache.py [new file with mode: 0644]
test/test_download.py
test/test_playlists.py [deleted file]
test/test_utils.py
test/test_youtube_lists.py
youtube-dl [deleted file]
youtube-dl.exe [deleted file]
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/cache.py [new file with mode: 0644]
youtube_dl/downloader/hls.py
youtube_dl/downloader/http.py
youtube_dl/downloader/rtmp.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/academicearth.py
youtube_dl/extractor/adultswim.py
youtube_dl/extractor/anysex.py [new file with mode: 0644]
youtube_dl/extractor/aol.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/bambuser.py
youtube_dl/extractor/bandcamp.py
youtube_dl/extractor/beeg.py [new file with mode: 0644]
youtube_dl/extractor/br.py
youtube_dl/extractor/cbs.py
youtube_dl/extractor/chilloutzone.py
youtube_dl/extractor/cliphunter.py
youtube_dl/extractor/cloudy.py [new file with mode: 0644]
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/common.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/cspan.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/daum.py
youtube_dl/extractor/dbtv.py [new file with mode: 0644]
youtube_dl/extractor/deezer.py [new file with mode: 0644]
youtube_dl/extractor/dropbox.py
youtube_dl/extractor/drtuber.py [new file with mode: 0644]
youtube_dl/extractor/drtv.py
youtube_dl/extractor/eighttracks.py
youtube_dl/extractor/empflix.py
youtube_dl/extractor/eporner.py [new file with mode: 0644]
youtube_dl/extractor/everyonesmixtape.py
youtube_dl/extractor/facebook.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/hornbunny.py [new file with mode: 0644]
youtube_dl/extractor/hostingbulk.py [new file with mode: 0644]
youtube_dl/extractor/ign.py
youtube_dl/extractor/imdb.py
youtube_dl/extractor/instagram.py
youtube_dl/extractor/ivi.py
youtube_dl/extractor/izlesene.py
youtube_dl/extractor/khanacademy.py
youtube_dl/extractor/livestream.py
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/mlb.py
youtube_dl/extractor/moevideo.py [new file with mode: 0644]
youtube_dl/extractor/mofosex.py
youtube_dl/extractor/musicvault.py [new file with mode: 0644]
youtube_dl/extractor/nba.py
youtube_dl/extractor/nhl.py
youtube_dl/extractor/noco.py
youtube_dl/extractor/nosvideo.py [new file with mode: 0644]
youtube_dl/extractor/npo.py
youtube_dl/extractor/pornhd.py
youtube_dl/extractor/pornoxo.py [new file with mode: 0644]
youtube_dl/extractor/promptfile.py [new file with mode: 0644]
youtube_dl/extractor/prosiebensat1.py
youtube_dl/extractor/rtlnl.py
youtube_dl/extractor/rutube.py
youtube_dl/extractor/rutv.py
youtube_dl/extractor/sharesix.py [new file with mode: 0644]
youtube_dl/extractor/smotri.py
youtube_dl/extractor/sockshare.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/spiegel.py
youtube_dl/extractor/sportdeutschland.py [new file with mode: 0644]
youtube_dl/extractor/sunporno.py [new file with mode: 0644]
youtube_dl/extractor/swrmediathek.py
youtube_dl/extractor/teachertube.py
youtube_dl/extractor/techtalks.py
youtube_dl/extractor/ted.py
youtube_dl/extractor/telemb.py [new file with mode: 0644]
youtube_dl/extractor/tnaflix.py [new file with mode: 0644]
youtube_dl/extractor/toypics.py
youtube_dl/extractor/tudou.py
youtube_dl/extractor/tumblr.py
youtube_dl/extractor/turbo.py [new file with mode: 0644]
youtube_dl/extractor/tvigle.py
youtube_dl/extractor/tvplay.py
youtube_dl/extractor/unistra.py
youtube_dl/extractor/ustream.py
youtube_dl/extractor/veehd.py
youtube_dl/extractor/vgtv.py [new file with mode: 0644]
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/vine.py
youtube_dl/extractor/vporn.py [new file with mode: 0644]
youtube_dl/extractor/washingtonpost.py
youtube_dl/extractor/xhamster.py
youtube_dl/extractor/xtube.py
youtube_dl/extractor/yahoo.py
youtube_dl/extractor/youjizz.py
youtube_dl/extractor/youporn.py
youtube_dl/extractor/youtube.py
youtube_dl/options.py [new file with mode: 0644]
youtube_dl/utils.py
youtube_dl/version.py

index b8128fab17f0599c5aac3fd1313d8caf32cf535b..e44977ca36ed367c009fea0144f50d3d1893d082 100644 (file)
@@ -11,6 +11,7 @@ MANIFEST
 README.txt
 youtube-dl.1
 youtube-dl.bash-completion
+youtube-dl.fish
 youtube-dl
 youtube-dl.exe
 youtube-dl.tar.gz
diff --git a/LATEST_VERSION b/LATEST_VERSION
deleted file mode 100644 (file)
index a334573..0000000
+++ /dev/null
@@ -1 +0,0 @@
-2012.12.99
index d43cc1f3ba95e2ec16728320b5dd64b8a3558abb..5743f605a2ab4e93e76416732f6e42b252e87150 100644 (file)
@@ -2,5 +2,6 @@ include README.md
 include test/*.py
 include test/*.json
 include youtube-dl.bash-completion
+include youtube-dl.fish
 include youtube-dl.1
 recursive-include docs Makefile conf.py *.rst
index 088a9320bddfd367babd928bc96c71f3eaa4d9de..6272b826ce0bc86749948684c81f8436f29c7b9b 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
-all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
+all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.fish
 
 clean:
-       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
+       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.fish
 
 cleanall: clean
        rm -f youtube-dl youtube-dl.exe
@@ -29,6 +29,8 @@ install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
        install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1
        install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d
        install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl
+       install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions
+       install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish
 
 test:
        #nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
@@ -36,9 +38,9 @@ test:
 
 tar: youtube-dl.tar.gz
 
-.PHONY: all clean install test tar bash-completion pypi-files
+.PHONY: all clean install test tar bash-completion pypi-files fish-completion
 
-pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1
+pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
 
 youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
        zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py
@@ -64,7 +66,12 @@ youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-co
 
 bash-completion: youtube-dl.bash-completion
 
-youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
+youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in
+       python devscripts/fish-completion.py
+
+fish-completion: youtube-dl.fish
+
+youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.fish
        @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
                --exclude '*.DS_Store' \
                --exclude '*.kate-swp' \
@@ -78,5 +85,6 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
                -- \
                bin devscripts test youtube_dl docs \
                LICENSE README.md README.txt \
-               Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
+               Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
+               youtube-dl.fish setup.py \
                youtube-dl
index ca366039e4515f2095e15c260a4bd4fa65d22e87..5cc959ac54d33de813222558d6c44ce06a5e3391 100644 (file)
--- a/README.md
+++ b/README.md
@@ -345,6 +345,25 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231
 
 # FAQ
 
+### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
+
+YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
+
+If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to report bugs to the Ubuntu packaging guys - all they have to do is update the package to a somewhat recent version.
+
+Alternatively, uninstall the youtube-dl package and follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html). In a pinch, this should do if you used `apt-get` before to install youtube-dl:
+
+```
+sudo apt-get remove -y youtube-dl
+sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
+sudo chmod a+x /usr/local/bin/youtube-dl
+hash -r
+```
+
+### Do I always have to pass in `--max-quality FORMAT`, or `-citw`?
+
+By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, `--max-quality` *limits* the video quality (so if you want the best quality, do NOT pass it in), and the only option out of `-citw` that is regularly useful is `-i`.
+
 ### Can you please put the -b option back?
 
 Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and youtube-dl will try to download it.
diff --git a/devscripts/fish-completion.in b/devscripts/fish-completion.in
new file mode 100644 (file)
index 0000000..eb79765
--- /dev/null
@@ -0,0 +1,5 @@
+
+{{commands}}
+
+
+complete --command youtube-dl --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py
new file mode 100755 (executable)
index 0000000..f4aaf02
--- /dev/null
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+import optparse
+import os
+from os.path import dirname as dirn
+import sys
+
+sys.path.append(dirn(dirn((os.path.abspath(__file__)))))
+import youtube_dl
+from youtube_dl.utils import shell_quote
+
+FISH_COMPLETION_FILE = 'youtube-dl.fish'
+FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
+
+EXTRA_ARGS = {
+    'recode-video': ['--arguments', 'mp4 flv ogg webm mkv', '--exclusive'],
+
+    # Options that need a file parameter
+    'download-archive': ['--require-parameter'],
+    'cookies': ['--require-parameter'],
+    'load-info': ['--require-parameter'],
+    'batch-file': ['--require-parameter'],
+}
+
+def build_completion(opt_parser):
+    commands = []
+
+    for group in opt_parser.option_groups:
+        for option in group.option_list:
+            long_option = option.get_opt_string().strip('-')
+            help_msg = shell_quote([option.help])
+            complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option]
+            if option._short_opts:
+                complete_cmd += ['--short-option', option._short_opts[0].strip('-')]
+            if option.help != optparse.SUPPRESS_HELP:
+                complete_cmd += ['--description', option.help]
+            complete_cmd.extend(EXTRA_ARGS.get(long_option, []))
+            commands.append(shell_quote(complete_cmd))
+
+    with open(FISH_COMPLETION_TEMPLATE) as f:
+        template = f.read()
+    filled_template = template.replace('{{commands}}', '\n'.join(commands))
+    with open(FISH_COMPLETION_FILE, 'w') as f:
+        f.write(filled_template)
+
+parser = youtube_dl.parseOpts()[0]
+build_completion(parser)
index 453087e5f70fa92906926ef12ab3b192087c51c3..691517ceb9b34394115ed4e54521bad1d4f3b54b 100755 (executable)
@@ -73,7 +73,6 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
 (cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS)
 (cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
 (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
-git checkout HEAD -- youtube-dl youtube-dl.exe
 
 /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
 for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
index 03e7b358e4ec1b4800e06f6796e386a808b67891..cf6b92b0f7e61b504dfdc16b6b04568fd073982b 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -48,6 +48,7 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
 else:
     files_spec = [
         ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
+        ('etc/fish/completions', ['youtube-dl.fish']),
         ('share/doc/youtube_dl', ['README.txt']),
         ('share/man/man1', ['youtube-dl.1'])
     ]
index 01b11f6612dae53ae08bf397db89182c92ae4666..7f3ab8438736485187f96464d6844080ac98c45f 100644 (file)
@@ -103,7 +103,8 @@ def expect_info_dict(self, expected_dict, got_dict):
 
             self.assertTrue(
                 isinstance(got, compat_str),
-                'Expected a %r object, but got %r' % (compat_str, type(got)))
+                u'Expected a %s object, but got %s for field %s' % (
+                    compat_str.__name__, type(got).__name__, info_field))
             self.assertTrue(
                 match_rex.match(got),
                 u'field %s (value: %r) should match %r' % (info_field, got, match_str))
index 487a46d56670c1ded91cd71ed35055e54232187b..098cd0cd0c4d17ded161fffbcdc327a7bbb30ee3 100644 (file)
@@ -27,7 +27,6 @@
     "rejecttitle": null, 
     "retries": 10, 
     "simulate": false, 
-    "skip_download": false, 
     "subtitleslang": null, 
     "subtitlesformat": "srt",
     "test": true, 
index b1ad30bf10ad08a19e14658a8299c006f54c03fa..84b05da39e1e28d0df4d65acb6248aa77d7b6b65 100644 (file)
@@ -109,7 +109,9 @@ class TestAllURLsMatching(unittest.TestCase):
                 if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
                     self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
                 else:
-                    self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
+                    self.assertFalse(
+                        ie.suitable(url),
+                        '%s should not match URL %r . That URL belongs to %s.' % (type(ie).__name__, url, tc['name']))
 
     def test_keywords(self):
         self.assertMatch(':ytsubs', ['youtube:subscriptions'])
@@ -141,32 +143,6 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
         self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
 
-    def test_ComedyCentralShows(self):
-        self.assertMatch(
-            'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
-            ['ComedyCentralShows'])
-        self.assertMatch(
-            'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
-            ['ComedyCentralShows'])
-        self.assertMatch(
-            'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
-            ['ComedyCentralShows'])
-        self.assertMatch(
-            'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
-            ['ComedyCentralShows'])
-        self.assertMatch(
-            'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
-            ['ComedyCentralShows'])
-        self.assertMatch(
-            'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
-            ['ComedyCentralShows'])
-        self.assertMatch(
-            'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
-            ['ComedyCentralShows'])
-        self.assertMatch(
-            'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
-            ['ComedyCentralShows'])
-
     def test_yahoo_https(self):
         # https://github.com/rg3/youtube-dl/issues/2701
         self.assertMatch(
diff --git a/test/test_cache.py b/test/test_cache.py
new file mode 100644 (file)
index 0000000..a161601
--- /dev/null
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import shutil
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from test.helper import FakeYDL
+from youtube_dl.cache import Cache
+
+
+def _is_empty(d):
+    return not bool(os.listdir(d))
+
+
+def _mkdir(d):
+    if not os.path.exists(d):
+        os.mkdir(d)
+
+
+class TestCache(unittest.TestCase):
+    def setUp(self):
+        TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+        TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
+        _mkdir(TESTDATA_DIR)
+        self.test_dir = os.path.join(TESTDATA_DIR, 'cache_test')
+        self.tearDown()
+
+    def tearDown(self):
+        if os.path.exists(self.test_dir):
+            shutil.rmtree(self.test_dir)
+
+    def test_cache(self):
+        ydl = FakeYDL({
+            'cachedir': self.test_dir,
+        })
+        c = Cache(ydl)
+        obj = {'x': 1, 'y': ['ä', '\\a', True]}
+        self.assertEqual(c.load('test_cache', 'k.'), None)
+        c.store('test_cache', 'k.', obj)
+        self.assertEqual(c.load('test_cache', 'k2'), None)
+        self.assertFalse(_is_empty(self.test_dir))
+        self.assertEqual(c.load('test_cache', 'k.'), obj)
+        self.assertEqual(c.load('test_cache', 'y'), None)
+        self.assertEqual(c.load('test_cache2', 'k.'), None)
+        c.remove()
+        self.assertFalse(os.path.exists(self.test_dir))
+        self.assertEqual(c.load('test_cache', 'k.'), None)
+
+
+if __name__ == '__main__':
+    unittest.main()
index 6422ef1197d226e8b7f098e7cb415ef58c187fed..2b8ac69754502457ea2e09aa026f3eceaee732f4 100644 (file)
@@ -28,6 +28,7 @@ from youtube_dl.utils import (
     compat_HTTPError,
     DownloadError,
     ExtractorError,
+    format_bytes,
     UnavailableVideoError,
 )
 from youtube_dl.extractor import get_info_extractor
@@ -103,8 +104,11 @@ def generator(test_case):
         def get_tc_filename(tc):
             return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
 
-        def try_rm_tcs_files():
-            for tc in test_cases:
+        res_dict = None
+        def try_rm_tcs_files(tcs=None):
+            if tcs is None:
+                tcs = test_cases
+            for tc in tcs:
                 tc_filename = get_tc_filename(tc)
                 try_rm(tc_filename)
                 try_rm(tc_filename + '.part')
@@ -148,24 +152,47 @@ def generator(test_case):
                 self.assertEqual(
                     len(res_dict['entries']),
                     test_case['playlist_count'],
-                    'Expected at %d in playlist %s, but got %d.')
+                    'Expected %d entries in playlist %s, but got %d.' % (
+                        test_case['playlist_count'],
+                        test_case['url'],
+                        len(res_dict['entries']),
+                    ))
+            if 'playlist_duration_sum' in test_case:
+                got_duration = sum(e['duration'] for e in res_dict['entries'])
+                self.assertEqual(
+                    test_case['playlist_duration_sum'], got_duration)
 
             for tc in test_cases:
                 tc_filename = get_tc_filename(tc)
                 if not test_case.get('params', {}).get('skip_download', False):
                     self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
                     self.assertTrue(tc_filename in finished_hook_called)
+                    expected_minsize = tc.get('file_minsize', 10000)
+                    if expected_minsize is not None:
+                        if params.get('test'):
+                            expected_minsize = max(expected_minsize, 10000)
+                        got_fsize = os.path.getsize(tc_filename)
+                        assertGreaterEqual(
+                            self, got_fsize, expected_minsize,
+                            'Expected %s to be at least %s, but it\'s only %s ' %
+                            (tc_filename, format_bytes(expected_minsize),
+                                format_bytes(got_fsize)))
+                    if 'md5' in tc:
+                        md5_for_file = _file_md5(tc_filename)
+                        self.assertEqual(md5_for_file, tc['md5'])
                 info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
                 self.assertTrue(os.path.exists(info_json_fn))
-                if 'md5' in tc:
-                    md5_for_file = _file_md5(tc_filename)
-                    self.assertEqual(md5_for_file, tc['md5'])
                 with io.open(info_json_fn, encoding='utf-8') as infof:
                     info_dict = json.load(infof)
 
                 expect_info_dict(self, tc.get('info_dict', {}), info_dict)
         finally:
             try_rm_tcs_files()
+            if is_playlist and res_dict is not None:
+                # Remove all other files that may have been extracted if the
+                # extractor returns full results even with extract_flat
+                res_tcs = [{'info_dict': e} for e in res_dict['entries']]
+                try_rm_tcs_files(res_tcs)
 
     return test_template
 
diff --git a/test/test_playlists.py b/test/test_playlists.py
deleted file mode 100644 (file)
index 0137b83..0000000
+++ /dev/null
@@ -1,395 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-## DEPRECATED FILE!
-# Add new tests to the extractors themselves, like this:
-# _TEST = {
-#    'url': 'http://example.com/playlist/42',
-#    'playlist_mincount': 99,
-#    'info_dict': {
-#        'id': '42',
-#        'title': 'Playlist number forty-two',
-#    }
-# }
-
-from __future__ import unicode_literals
-
-# Allow direct execution
-import os
-import sys
-import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from test.helper import (
-    assertRegexpMatches,
-    assertGreaterEqual,
-    expect_info_dict,
-    FakeYDL,
-)
-
-from youtube_dl.extractor import (
-    AcademicEarthCourseIE,
-    DailymotionPlaylistIE,
-    DailymotionUserIE,
-    VimeoChannelIE,
-    VimeoUserIE,
-    VimeoAlbumIE,
-    VimeoGroupsIE,
-    VineUserIE,
-    UstreamChannelIE,
-    SoundcloudSetIE,
-    SoundcloudUserIE,
-    SoundcloudPlaylistIE,
-    TeacherTubeUserIE,
-    LivestreamIE,
-    LivestreamOriginalIE,
-    NHLVideocenterIE,
-    BambuserChannelIE,
-    BandcampAlbumIE,
-    SmotriCommunityIE,
-    SmotriUserIE,
-    IviCompilationIE,
-    ImdbListIE,
-    KhanAcademyIE,
-    EveryonesMixtapeIE,
-    RutubeChannelIE,
-    RutubePersonIE,
-    GoogleSearchIE,
-    GenericIE,
-    TEDIE,
-    ToypicsUserIE,
-    XTubeUserIE,
-    InstagramUserIE,
-    CSpanIE,
-    AolIE,
-    GameOnePlaylistIE,
-)
-
-
-class TestPlaylists(unittest.TestCase):
-    def assertIsPlaylist(self, info):
-        """Make sure the info has '_type' set to 'playlist'"""
-        self.assertEqual(info['_type'], 'playlist')
-
-    def test_dailymotion_playlist(self):
-        dl = FakeYDL()
-        ie = DailymotionPlaylistIE(dl)
-        result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'SPORT')
-        self.assertTrue(len(result['entries']) > 20)
-
-    def test_dailymotion_user(self):
-        dl = FakeYDL()
-        ie = DailymotionUserIE(dl)
-        result = ie.extract('https://www.dailymotion.com/user/nqtv')
-        self.assertIsPlaylist(result)
-        assertGreaterEqual(self, len(result['entries']), 100)
-        self.assertEqual(result['title'], 'Rémi Gaillard')
-
-    def test_vimeo_channel(self):
-        dl = FakeYDL()
-        ie = VimeoChannelIE(dl)
-        result = ie.extract('http://vimeo.com/channels/tributes')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'Vimeo Tributes')
-        self.assertTrue(len(result['entries']) > 24)
-
-    def test_vimeo_user(self):
-        dl = FakeYDL()
-        ie = VimeoUserIE(dl)
-        result = ie.extract('http://vimeo.com/nkistudio/videos')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'Nki')
-        self.assertTrue(len(result['entries']) > 65)
-
-    def test_vimeo_album(self):
-        dl = FakeYDL()
-        ie = VimeoAlbumIE(dl)
-        result = ie.extract('http://vimeo.com/album/2632481')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'Staff Favorites: November 2013')
-        self.assertTrue(len(result['entries']) > 12)
-
-    def test_vimeo_groups(self):
-        dl = FakeYDL()
-        ie = VimeoGroupsIE(dl)
-        result = ie.extract('http://vimeo.com/groups/rolexawards')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'Rolex Awards for Enterprise')
-        self.assertTrue(len(result['entries']) > 72)
-
-    def test_vine_user(self):
-        dl = FakeYDL()
-        ie = VineUserIE(dl)
-        result = ie.extract('https://vine.co/Visa')
-        self.assertIsPlaylist(result)
-        assertGreaterEqual(self, len(result['entries']), 47)
-
-    def test_ustream_channel(self):
-        dl = FakeYDL()
-        ie = UstreamChannelIE(dl)
-        result = ie.extract('http://www.ustream.tv/channel/channeljapan')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '10874166')
-        assertGreaterEqual(self, len(result['entries']), 54)
-
-    def test_soundcloud_set(self):
-        dl = FakeYDL()
-        ie = SoundcloudSetIE(dl)
-        result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'The Royal Concept EP')
-        assertGreaterEqual(self, len(result['entries']), 6)
-
-    def test_soundcloud_user(self):
-        dl = FakeYDL()
-        ie = SoundcloudUserIE(dl)
-        result = ie.extract('https://soundcloud.com/the-concept-band')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '9615865')
-        assertGreaterEqual(self, len(result['entries']), 12)
-
-    def test_soundcloud_likes(self):
-        dl = FakeYDL()
-        ie = SoundcloudUserIE(dl)
-        result = ie.extract('https://soundcloud.com/the-concept-band/likes')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '9615865')
-        assertGreaterEqual(self, len(result['entries']), 1)
-
-    def test_soundcloud_playlist(self):
-        dl = FakeYDL()
-        ie = SoundcloudPlaylistIE(dl)
-        result = ie.extract('http://api.soundcloud.com/playlists/4110309')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '4110309')
-        self.assertEqual(result['title'], 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]')
-        assertRegexpMatches(
-            self, result['description'], r'.*?TILT Brass - Bowery Poetry Club')
-        self.assertEqual(len(result['entries']), 6)
-
-    def test_livestream_event(self):
-        dl = FakeYDL()
-        ie = LivestreamIE(dl)
-        result = ie.extract('http://new.livestream.com/tedx/cityenglish')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'TEDCity2.0 (English)')
-        assertGreaterEqual(self, len(result['entries']), 4)
-
-    def test_livestreamoriginal_folder(self):
-        dl = FakeYDL()
-        ie = LivestreamOriginalIE(dl)
-        result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
-        assertGreaterEqual(self, len(result['entries']), 28)
-
-    def test_nhl_videocenter(self):
-        dl = FakeYDL()
-        ie = NHLVideocenterIE(dl)
-        result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '999')
-        self.assertEqual(result['title'], 'Highlights')
-        self.assertEqual(len(result['entries']), 12)
-
-    def test_bambuser_channel(self):
-        dl = FakeYDL()
-        ie = BambuserChannelIE(dl)
-        result = ie.extract('http://bambuser.com/channel/pixelversity')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'pixelversity')
-        assertGreaterEqual(self, len(result['entries']), 60)
-
-    def test_bandcamp_album(self):
-        dl = FakeYDL()
-        ie = BandcampAlbumIE(dl)
-        result = ie.extract('http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'Hierophany of the Open Grave')
-        assertGreaterEqual(self, len(result['entries']), 9)
-        
-    def test_smotri_community(self):
-        dl = FakeYDL()
-        ie = SmotriCommunityIE(dl)
-        result = ie.extract('http://smotri.com/community/video/kommuna')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'kommuna')
-        self.assertEqual(result['title'], 'КПРФ')
-        assertGreaterEqual(self, len(result['entries']), 4)
-        
-    def test_smotri_user(self):
-        dl = FakeYDL()
-        ie = SmotriUserIE(dl)
-        result = ie.extract('http://smotri.com/user/inspector')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'inspector')
-        self.assertEqual(result['title'], 'Inspector')
-        assertGreaterEqual(self, len(result['entries']), 9)
-
-    def test_AcademicEarthCourse(self):
-        dl = FakeYDL()
-        ie = AcademicEarthCourseIE(dl)
-        result = ie.extract('http://academicearth.org/playlists/laws-of-nature/')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'laws-of-nature')
-        self.assertEqual(result['title'], 'Laws of Nature')
-        self.assertEqual(result['description'],u'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.')# u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
-        self.assertEqual(len(result['entries']), 4)
-        
-    def test_ivi_compilation(self):
-        dl = FakeYDL()
-        ie = IviCompilationIE(dl)
-        result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'dvoe_iz_lartsa')
-        self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)')
-        assertGreaterEqual(self, len(result['entries']), 24)
-
-    def test_ivi_compilation_season(self):
-        dl = FakeYDL()
-        ie = IviCompilationIE(dl)
-        result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa/season1')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1')
-        self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон')
-        assertGreaterEqual(self, len(result['entries']), 12)
-        
-    def test_imdb_list(self):
-        dl = FakeYDL()
-        ie = ImdbListIE(dl)
-        result = ie.extract('http://www.imdb.com/list/JFs9NWw6XI0')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'JFs9NWw6XI0')
-        self.assertEqual(result['title'], 'March 23, 2012 Releases')
-        self.assertEqual(len(result['entries']), 7)
-
-    def test_khanacademy_topic(self):
-        dl = FakeYDL()
-        ie = KhanAcademyIE(dl)
-        result = ie.extract('https://www.khanacademy.org/math/applied-math/cryptography')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'cryptography')
-        self.assertEqual(result['title'], 'Journey into cryptography')
-        self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?')
-        assertGreaterEqual(self, len(result['entries']), 3)
-
-    def test_EveryonesMixtape(self):
-        dl = FakeYDL()
-        ie = EveryonesMixtapeIE(dl)
-        result = ie.extract('http://everyonesmixtape.com/#/mix/m7m0jJAbMQi')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'm7m0jJAbMQi')
-        self.assertEqual(result['title'], 'Driving')
-        self.assertEqual(len(result['entries']), 24)
-        
-    def test_rutube_channel(self):
-        dl = FakeYDL()
-        ie = RutubeChannelIE(dl)
-        result = ie.extract('http://rutube.ru/tags/video/1800/')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '1800')
-        assertGreaterEqual(self, len(result['entries']), 68)
-
-    def test_rutube_person(self):
-        dl = FakeYDL()
-        ie = RutubePersonIE(dl)
-        result = ie.extract('http://rutube.ru/video/person/313878/')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '313878')
-        assertGreaterEqual(self, len(result['entries']), 37)
-
-    def test_multiple_brightcove_videos(self):
-        # https://github.com/rg3/youtube-dl/issues/2283
-        dl = FakeYDL()
-        ie = GenericIE(dl)
-        result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
-        self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
-        self.assertEqual(len(result['entries']), 3)
-
-    def test_ted_playlist(self):
-        dl = FakeYDL()
-        ie = TEDIE(dl)
-        result = ie.extract('http://www.ted.com/playlists/who_are_the_hackers')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '10')
-        self.assertEqual(result['title'], 'Who are the hackers?')
-        assertGreaterEqual(self, len(result['entries']), 6)
-
-    def test_toypics_user(self):
-        dl = FakeYDL()
-        ie = ToypicsUserIE(dl)
-        result = ie.extract('http://videos.toypics.net/Mikey')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'Mikey')
-        assertGreaterEqual(self, len(result['entries']), 17)
-
-    def test_xtube_user(self):
-        dl = FakeYDL()
-        ie = XTubeUserIE(dl)
-        result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'greenshowers')
-        assertGreaterEqual(self, len(result['entries']), 155)
-
-    def test_InstagramUser(self):
-        dl = FakeYDL()
-        ie = InstagramUserIE(dl)
-        result = ie.extract('http://instagram.com/porsche')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'porsche')
-        assertGreaterEqual(self, len(result['entries']), 2)
-        test_video = next(
-            e for e in result['entries']
-            if e['id'] == '614605558512799803_462752227')
-        dl.add_default_extra_info(test_video, ie, '(irrelevant URL)')
-        dl.process_video_result(test_video, download=False)
-        EXPECTED = {
-            'id': '614605558512799803_462752227',
-            'ext': 'mp4',
-            'title': '#Porsche Intelligent Performance.',
-            'thumbnail': 're:^https?://.*\.jpg',
-            'uploader': 'Porsche',
-            'uploader_id': 'porsche',
-            'timestamp': 1387486713,
-            'upload_date': '20131219',
-        }
-        expect_info_dict(self, EXPECTED, test_video)
-
-    def test_CSpan_playlist(self):
-        dl = FakeYDL()
-        ie = CSpanIE(dl)
-        result = ie.extract(
-            'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '342759')
-        self.assertEqual(
-            result['title'], 'General Motors Ignition Switch Recall')
-        whole_duration = sum(e['duration'] for e in result['entries'])
-        self.assertEqual(whole_duration, 14855)
-
-    def test_aol_playlist(self):
-        dl = FakeYDL()
-        ie = AolIE(dl)
-        result = ie.extract(
-            'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '152147')
-        self.assertEqual(
-            result['title'], 'Brace Yourself - Today\'s Weirdest News')
-        assertGreaterEqual(self, len(result['entries']), 10)
-
-    def test_TeacherTubeUser(self):
-        dl = FakeYDL()
-        ie = TeacherTubeUserIE(dl)
-        result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'rbhagwati2')
-        assertGreaterEqual(self, len(result['entries']), 179)
-
-
-if __name__ == '__main__':
-    unittest.main()
index 0953db3719b004de0751427e9b83962fd2fc8f84..3efbed29dd34de570f2db4e6eb4954ec2f4b9c6e 100644 (file)
@@ -1,6 +1,8 @@
 #!/usr/bin/env python
 # coding: utf-8
 
+from __future__ import unicode_literals
+
 # Allow direct execution
 import os
 import sys
@@ -13,7 +15,6 @@ import io
 import json
 import xml.etree.ElementTree
 
-#from youtube_dl.utils import htmlentity_transform
 from youtube_dl.utils import (
     DateRange,
     encodeFilename,
@@ -39,13 +40,11 @@ from youtube_dl.utils import (
     parse_iso8601,
     strip_jsonp,
     uppercase_escape,
+    limit_length,
+    escape_rfc3986,
+    escape_url,
 )
 
-if sys.version_info < (3, 0):
-    _compat_str = lambda b: b.decode('unicode-escape')
-else:
-    _compat_str = lambda s: s
-
 
 class TestUtil(unittest.TestCase):
     def test_timeconvert(self):
@@ -67,9 +66,9 @@ class TestUtil(unittest.TestCase):
         self.assertEqual('this - that', sanitize_filename('this: that'))
 
         self.assertEqual(sanitize_filename('AT&T'), 'AT&T')
-        aumlaut = _compat_str('\xe4')
+        aumlaut = 'ä'
         self.assertEqual(sanitize_filename(aumlaut), aumlaut)
-        tests = _compat_str('\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430')
+        tests = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430'
         self.assertEqual(sanitize_filename(tests), tests)
 
         forbidden = '"\0\\/'
@@ -91,9 +90,9 @@ class TestUtil(unittest.TestCase):
         self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
         self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))
 
-        tests = _compat_str('a\xe4b\u4e2d\u56fd\u7684c')
+        tests = 'a\xe4b\u4e2d\u56fd\u7684c'
         self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c')
-        self.assertTrue(sanitize_filename(_compat_str('\xf6'), restricted=True) != '')  # No empty filename
+        self.assertTrue(sanitize_filename('\xf6', restricted=True) != '')  # No empty filename
 
         forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
         for fc in forbidden:
@@ -101,8 +100,8 @@ class TestUtil(unittest.TestCase):
                 self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
 
         # Handle a common case more neatly
-        self.assertEqual(sanitize_filename(_compat_str('\u5927\u58f0\u5e26 - Song'), restricted=True), 'Song')
-        self.assertEqual(sanitize_filename(_compat_str('\u603b\u7edf: Speech'), restricted=True), 'Speech')
+        self.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song')
+        self.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech')
         # .. but make sure the file name is never empty
         self.assertTrue(sanitize_filename('-', restricted=True) != '')
         self.assertTrue(sanitize_filename(':', restricted=True) != '')
@@ -120,7 +119,9 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1])
 
     def test_unescape_html(self):
-        self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;'))
+        self.assertEqual(unescapeHTML('%20;'), '%20;')
+        self.assertEqual(
+            unescapeHTML('&eacute;'), 'é')
         
     def test_daterange(self):
         _20century = DateRange("19000101","20000101")
@@ -138,7 +139,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unified_strdate('1968-12-10'), '19681210')
 
     def test_find_xpath_attr(self):
-        testxml = u'''<root>
+        testxml = '''<root>
             <node/>
             <node x="a"/>
             <node x="a" y="c" />
@@ -151,18 +152,18 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])
 
     def test_meta_parser(self):
-        testhtml = u'''
+        testhtml = '''
         <head>
             <meta name="description" content="foo &amp; bar">
             <meta content='Plato' name='author'/>
         </head>
         '''
         get_meta = lambda name: get_meta_content(name, testhtml)
-        self.assertEqual(get_meta('description'), u'foo & bar')
+        self.assertEqual(get_meta('description'), 'foo & bar')
         self.assertEqual(get_meta('author'), 'Plato')
 
     def test_xpath_with_ns(self):
-        testxml = u'''<root xmlns:media="http://example.com/">
+        testxml = '''<root xmlns:media="http://example.com/">
             <media:song>
                 <media:author>The Author</media:author>
                 <url>http://server.com/download.mp3</url>
@@ -171,8 +172,8 @@ class TestUtil(unittest.TestCase):
         doc = xml.etree.ElementTree.fromstring(testxml)
         find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'}))
         self.assertTrue(find('media:song') is not None)
-        self.assertEqual(find('media:song/media:author').text, u'The Author')
-        self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3')
+        self.assertEqual(find('media:song/media:author').text, 'The Author')
+        self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
 
     def test_smuggle_url(self):
         data = {u"ö": u"ö", u"abc": [3]}
@@ -187,22 +188,22 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(res_data, None)
 
     def test_shell_quote(self):
-        args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
-        self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
+        args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
+        self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")
 
     def test_str_to_int(self):
         self.assertEqual(str_to_int('123,456'), 123456)
         self.assertEqual(str_to_int('123.456'), 123456)
 
     def test_url_basename(self):
-        self.assertEqual(url_basename(u'http://foo.de/'), u'')
-        self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz')
-        self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
-        self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
-        self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')
+        self.assertEqual(url_basename('http://foo.de/'), '')
+        self.assertEqual(url_basename('http://foo.de/bar/baz'), 'baz')
+        self.assertEqual(url_basename('http://foo.de/bar/baz?x=y'), 'baz')
+        self.assertEqual(url_basename('http://foo.de/bar/baz#x=y'), 'baz')
+        self.assertEqual(url_basename('http://foo.de/bar/baz/'), 'baz')
         self.assertEqual(
-            url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'),
-            u'trailer.mp4')
+            url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
+            'trailer.mp4')
 
     def test_parse_duration(self):
         self.assertEqual(parse_duration(None), None)
@@ -213,6 +214,9 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_duration('00:01:01'), 61)
         self.assertEqual(parse_duration('x:y'), None)
         self.assertEqual(parse_duration('3h11m53s'), 11513)
+        self.assertEqual(parse_duration('3h 11m 53s'), 11513)
+        self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513)
+        self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513)
         self.assertEqual(parse_duration('62m45s'), 3765)
         self.assertEqual(parse_duration('6m59s'), 419)
         self.assertEqual(parse_duration('49s'), 49)
@@ -256,16 +260,16 @@ class TestUtil(unittest.TestCase):
         testPL(5, 2, (20, 99), [])
 
     def test_struct_unpack(self):
-        self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))
+        self.assertEqual(struct_unpack('!B', b'\x00'), (0,))
 
     def test_read_batch_urls(self):
-        f = io.StringIO(u'''\xef\xbb\xbf foo
+        f = io.StringIO('''\xef\xbb\xbf foo
             bar\r
             baz
             # More after this line\r
             ; or after this
             bam''')
-        self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
+        self.assertEqual(read_batch_urls(f), ['foo', 'bar', 'baz', 'bam'])
 
     def test_urlencode_postdata(self):
         data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
@@ -282,8 +286,44 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(d, [{"id": "532cb", "x": 3}])
 
     def test_uppercase_escape(self):
-        self.assertEqual(uppercase_escape(u'aä'), u'aä')
-        self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
+        self.assertEqual(uppercase_escape('aä'), 'aä')
+        self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
+
+    def test_limit_length(self):
+        self.assertEqual(limit_length(None, 12), None)
+        self.assertEqual(limit_length('foo', 12), 'foo')
+        self.assertTrue(
+            limit_length('foo bar baz asd', 12).startswith('foo bar'))
+        self.assertTrue('...' in limit_length('foo bar baz asd', 12))
+
+    def test_escape_rfc3986(self):
+        reserved = "!*'();:@&=+$,/?#[]"
+        unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
+        self.assertEqual(escape_rfc3986(reserved), reserved)
+        self.assertEqual(escape_rfc3986(unreserved), unreserved)
+        self.assertEqual(escape_rfc3986('тест'), '%D1%82%D0%B5%D1%81%D1%82')
+        self.assertEqual(escape_rfc3986('%D1%82%D0%B5%D1%81%D1%82'), '%D1%82%D0%B5%D1%81%D1%82')
+        self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar')
+        self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar')
+
+    def test_escape_url(self):
+        self.assertEqual(
+            escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'),
+            'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4'
+        )
+        self.assertEqual(
+            escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
+            'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290'
+        )
+        self.assertEqual(
+            escape_url('http://тест.рф/фрагмент'),
+            'http://тест.рф/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
+        )
+        self.assertEqual(
+            escape_url('http://тест.рф/абв?абв=абв#абв'),
+            'http://тест.рф/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
+        )
+        self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
 
 if __name__ == '__main__':
     unittest.main()
index 3aadedd64cf5af38ab1d18b640b10301c2073de2..1fa99f88b595644df5c4ed50e4c134cc15668638 100644 (file)
@@ -25,15 +25,6 @@ class TestYoutubeLists(unittest.TestCase):
         """Make sure the info has '_type' set to 'playlist'"""
         self.assertEqual(info['_type'], 'playlist')
 
-    def test_youtube_playlist(self):
-        dl = FakeYDL()
-        ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'ytdl test PL')
-        ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
-        self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
-
     def test_youtube_playlist_noplaylist(self):
         dl = FakeYDL()
         dl.params['noplaylist'] = True
@@ -41,36 +32,7 @@ class TestYoutubeLists(unittest.TestCase):
         result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
         self.assertEqual(result['_type'], 'url')
         self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
-
-    def test_issue_673(self):
-        dl = FakeYDL()
-        ie = YoutubePlaylistIE(dl)
-        result = ie.extract('PLBB231211A4F62143')
-        self.assertTrue(len(result['entries']) > 25)
-
-    def test_youtube_playlist_long(self):
-        dl = FakeYDL()
-        ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
-        self.assertIsPlaylist(result)
-        self.assertTrue(len(result['entries']) >= 799)
-
-    def test_youtube_playlist_with_deleted(self):
-        #651
-        dl = FakeYDL()
-        ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
-        ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
-        self.assertFalse('pElCt5oNDuI' in ytie_results)
-        self.assertFalse('KdPEApIVdWM' in ytie_results)
-        
-    def test_youtube_playlist_empty(self):
-        dl = FakeYDL()
-        ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')
-        self.assertIsPlaylist(result)
-        self.assertEqual(len(result['entries']), 0)
-
+    
     def test_youtube_course(self):
         dl = FakeYDL()
         ie = YoutubePlaylistIE(dl)
@@ -97,12 +59,6 @@ class TestYoutubeLists(unittest.TestCase):
         result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
         self.assertTrue(len(result['entries']) >= 320)
 
-    def test_youtube_safe_search(self):
-        dl = FakeYDL()
-        ie = YoutubePlaylistIE(dl)
-        result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')
-        self.assertEqual(len(result['entries']), 2)
-
     def test_youtube_show(self):
         dl = FakeYDL()
         ie = YoutubeShowIE(dl)
diff --git a/youtube-dl b/youtube-dl
deleted file mode 100755 (executable)
index e3eb877..0000000
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env python
-
-import sys, os
-import json, hashlib
-
-try:
-    import urllib.request as compat_urllib_request
-except ImportError: # Python 2
-    import urllib2 as compat_urllib_request
-
-def rsa_verify(message, signature, key):
-    from struct import pack
-    from hashlib import sha256
-    from sys import version_info
-    def b(x):
-        if version_info[0] == 2: return x
-        else: return x.encode('latin1')
-    assert(type(message) == type(b('')))
-    block_size = 0
-    n = key[0]
-    while n:
-        block_size += 1
-        n >>= 8
-    signature = pow(int(signature, 16), key[1], key[0])
-    raw_bytes = []
-    while signature:
-        raw_bytes.insert(0, pack("B", signature & 0xFF))
-        signature >>= 8
-    signature = (block_size - len(raw_bytes)) * b('\x00') + b('').join(raw_bytes)
-    if signature[0:2] != b('\x00\x01'): return False
-    signature = signature[2:]
-    if not b('\x00') in signature: return False
-    signature = signature[signature.index(b('\x00'))+1:]
-    if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')): return False
-    signature = signature[19:]
-    if signature != sha256(message).digest(): return False
-    return True
-
-sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n')
-sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n')
-sys.stderr.write(u'From now on, get the binaries from http://rg3.github.io/youtube-dl/download.html, not from the git repository.\n\n')
-
-try:
-       raw_input()
-except NameError: # Python 3
-       input()
-
-filename = sys.argv[0]
-
-UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
-VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
-JSON_URL = UPDATE_URL + 'versions.json'
-UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
-
-if not os.access(filename, os.W_OK):
-    sys.exit('ERROR: no write permissions on %s' % filename)
-
-try:
-    versions_info = compat_urllib_request.urlopen(JSON_URL).read().decode('utf-8')
-    versions_info = json.loads(versions_info)
-except:
-    sys.exit(u'ERROR: can\'t obtain versions info. Please try again later.')
-if not 'signature' in versions_info:
-    sys.exit(u'ERROR: the versions file is not signed or corrupted. Aborting.')
-signature = versions_info['signature']
-del versions_info['signature']
-if not rsa_verify(json.dumps(versions_info, sort_keys=True).encode('utf-8'), signature, UPDATES_RSA_KEY):
-    sys.exit(u'ERROR: the versions file signature is invalid. Aborting.')
-
-version = versions_info['versions'][versions_info['latest']]
-
-try:
-    urlh = compat_urllib_request.urlopen(version['bin'][0])
-    newcontent = urlh.read()
-    urlh.close()
-except (IOError, OSError) as err:
-    sys.exit('ERROR: unable to download latest version')
-
-newcontent_hash = hashlib.sha256(newcontent).hexdigest()
-if newcontent_hash != version['bin'][1]:
-    sys.exit(u'ERROR: the downloaded file hash does not match. Aborting.')
-
-try:
-    with open(filename, 'wb') as outf:
-        outf.write(newcontent)
-except (IOError, OSError) as err:
-    sys.exit('ERROR: unable to overwrite current version')
-
-sys.stderr.write(u'Done! Now you can run youtube-dl.\n')
diff --git a/youtube-dl.exe b/youtube-dl.exe
deleted file mode 100644 (file)
index 45eee04..0000000
Binary files a/youtube-dl.exe and /dev/null differ
index 98639e004c4502bef367eb23b6180269f29e54f7..9519594c9ad2dab36e1d66fe2f016d7edc6e798d 100755 (executable)
@@ -28,6 +28,7 @@ from .utils import (
     compat_str,
     compat_urllib_error,
     compat_urllib_request,
+    escape_url,
     ContentTooShortError,
     date_from_str,
     DateRange,
@@ -57,6 +58,7 @@ from .utils import (
     YoutubeDLHandler,
     prepend_extension,
 )
+from .cache import Cache
 from .extractor import get_info_extractor, gen_extractors
 from .downloader import get_suitable_downloader
 from .postprocessor import FFmpegMergerPP
@@ -133,7 +135,7 @@ class YoutubeDL(object):
     daterange:         A DateRange object, download only if the upload_date is in the range.
     skip_download:     Skip the actual download of the video file
     cachedir:          Location of the cache files in the filesystem.
-                       None to disable filesystem cache.
+                       False to disable filesystem cache.
     noplaylist:        Download single video instead of a playlist if in doubt.
     age_limit:         An integer representing the user's age in years.
                        Unsuitable videos for the given age are skipped.
@@ -195,6 +197,7 @@ class YoutubeDL(object):
         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
         self._err_file = sys.stderr
         self.params = params
+        self.cache = Cache(self)
 
         if params.get('bidi_workaround', False):
             try:
@@ -1239,6 +1242,25 @@ class YoutubeDL(object):
 
     def urlopen(self, req):
         """ Start an HTTP download """
+
+        # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
+        # always respected by websites, some tend to give out URLs with non percent-encoded
+        # non-ASCII characters (see telemb.py, ard.py [#3412])
+        # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
+        # To work around aforementioned issue we will replace request's original URL with
+        # percent-encoded one
+        url = req if isinstance(req, compat_str) else req.get_full_url()
+        url_escaped = escape_url(url)
+
+        # Substitute URL if any change after escaping
+        if url != url_escaped:
+            if isinstance(req, compat_str):
+                req = url_escaped
+            else:
+                req = compat_urllib_request.Request(
+                    url_escaped, data=req.data, headers=req.headers,
+                    origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
+
         return self._opener.open(req, timeout=self._socket_timeout)
 
     def print_debug_header(self):
index b1569505369a3669f46040d29ea1cd0277130460..42d0a018014045f4a288c8c48669edd669e7bca9 100644 (file)
@@ -74,29 +74,28 @@ __authors__  = (
     'Keith Beckman',
     'Ole Ernst',
     'Aaron McDaniel (mcd1992)',
+    'Magnus Kolstad',
 )
 
 __license__ = 'Public Domain'
 
 import codecs
 import io
-import optparse
 import os
 import random
-import shlex
-import shutil
 import sys
 
 
+from .options import (
+    parseOpts,
+)
 from .utils import (
     compat_getpass,
     compat_print,
     DateRange,
     DEFAULT_OUTTMPL,
     decodeOption,
-    get_term_width,
     DownloadError,
-    get_cachedir,
     MaxDownloadsReached,
     preferredencoding,
     read_batch_urls,
@@ -110,7 +109,6 @@ from .downloader import (
     FileDownloader,
 )
 from .extractor import gen_extractors
-from .version import __version__
 from .YoutubeDL import YoutubeDL
 from .postprocessor import (
     AtomicParsleyPP,
@@ -124,475 +122,6 @@ from .postprocessor import (
 )
 
 
-def parseOpts(overrideArguments=None):
-    def _readOptions(filename_bytes, default=[]):
-        try:
-            optionf = open(filename_bytes)
-        except IOError:
-            return default  # silently skip if file is not present
-        try:
-            res = []
-            for l in optionf:
-                res += shlex.split(l, comments=True)
-        finally:
-            optionf.close()
-        return res
-
-    def _readUserConf():
-        xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
-        if xdg_config_home:
-            userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
-            if not os.path.isfile(userConfFile):
-                userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
-        else:
-            userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
-            if not os.path.isfile(userConfFile):
-                userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
-        userConf = _readOptions(userConfFile, None)
-
-        if userConf is None:
-            appdata_dir = os.environ.get('appdata')
-            if appdata_dir:
-                userConf = _readOptions(
-                    os.path.join(appdata_dir, 'youtube-dl', 'config'),
-                    default=None)
-                if userConf is None:
-                    userConf = _readOptions(
-                        os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
-                        default=None)
-
-        if userConf is None:
-            userConf = _readOptions(
-                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
-                default=None)
-        if userConf is None:
-            userConf = _readOptions(
-                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
-                default=None)
-
-        if userConf is None:
-            userConf = []
-
-        return userConf
-
-    def _format_option_string(option):
-        ''' ('-o', '--option') -> -o, --format METAVAR'''
-
-        opts = []
-
-        if option._short_opts:
-            opts.append(option._short_opts[0])
-        if option._long_opts:
-            opts.append(option._long_opts[0])
-        if len(opts) > 1:
-            opts.insert(1, ', ')
-
-        if option.takes_value(): opts.append(' %s' % option.metavar)
-
-        return "".join(opts)
-
-    def _comma_separated_values_options_callback(option, opt_str, value, parser):
-        setattr(parser.values, option.dest, value.split(','))
-
-    def _hide_login_info(opts):
-        opts = list(opts)
-        for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:
-            try:
-                i = opts.index(private_opt)
-                opts[i+1] = '<PRIVATE>'
-            except ValueError:
-                pass
-        return opts
-
-    max_width = 80
-    max_help_position = 80
-
-    # No need to wrap help messages if we're on a wide console
-    columns = get_term_width()
-    if columns: max_width = columns
-
-    fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
-    fmt.format_option_strings = _format_option_string
-
-    kw = {
-        'version'   : __version__,
-        'formatter' : fmt,
-        'usage' : '%prog [options] url [url...]',
-        'conflict_handler' : 'resolve',
-    }
-
-    parser = optparse.OptionParser(**kw)
-
-    # option groups
-    general        = optparse.OptionGroup(parser, 'General Options')
-    selection      = optparse.OptionGroup(parser, 'Video Selection')
-    authentication = optparse.OptionGroup(parser, 'Authentication Options')
-    video_format   = optparse.OptionGroup(parser, 'Video Format Options')
-    subtitles      = optparse.OptionGroup(parser, 'Subtitle Options')
-    downloader     = optparse.OptionGroup(parser, 'Download Options')
-    postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
-    filesystem     = optparse.OptionGroup(parser, 'Filesystem Options')
-    workarounds    = optparse.OptionGroup(parser, 'Workarounds')
-    verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
-
-    general.add_option('-h', '--help',
-            action='help', help='print this help text and exit')
-    general.add_option('-v', '--version',
-            action='version', help='print program version and exit')
-    general.add_option('-U', '--update',
-            action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
-    general.add_option('-i', '--ignore-errors',
-            action='store_true', dest='ignoreerrors', help='continue on download errors, for example to skip unavailable videos in a playlist', default=False)
-    general.add_option('--abort-on-error',
-            action='store_false', dest='ignoreerrors',
-            help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
-    general.add_option('--dump-user-agent',
-            action='store_true', dest='dump_user_agent',
-            help='display the current browser identification', default=False)
-    general.add_option('--list-extractors',
-            action='store_true', dest='list_extractors',
-            help='List all supported extractors and the URLs they would handle', default=False)
-    general.add_option('--extractor-descriptions',
-            action='store_true', dest='list_extractor_descriptions',
-            help='Output descriptions of all supported extractors', default=False)
-    general.add_option(
-        '--proxy', dest='proxy', default=None, metavar='URL',
-        help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
-    general.add_option(
-        '--socket-timeout', dest='socket_timeout',
-        type=float, default=None, help=u'Time to wait before giving up, in seconds')
-    general.add_option(
-        '--default-search',
-        dest='default_search', metavar='PREFIX',
-        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
-    general.add_option(
-        '--ignore-config',
-        action='store_true',
-        help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
-
-    selection.add_option(
-        '--playlist-start',
-        dest='playliststart', metavar='NUMBER', default=1, type=int,
-        help='playlist video to start at (default is %default)')
-    selection.add_option(
-        '--playlist-end',
-        dest='playlistend', metavar='NUMBER', default=None, type=int,
-        help='playlist video to end at (default is last)')
-    selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
-    selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
-    selection.add_option('--max-downloads', metavar='NUMBER',
-                         dest='max_downloads', type=int, default=None,
-                         help='Abort after downloading NUMBER files')
-    selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
-    selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
-    selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
-    selection.add_option(
-        '--datebefore', metavar='DATE', dest='datebefore', default=None,
-        help='download only videos uploaded on or before this date (i.e. inclusive)')
-    selection.add_option(
-        '--dateafter', metavar='DATE', dest='dateafter', default=None,
-        help='download only videos uploaded on or after this date (i.e. inclusive)')
-    selection.add_option(
-        '--min-views', metavar='COUNT', dest='min_views',
-        default=None, type=int,
-        help="Do not download any videos with less than COUNT views",)
-    selection.add_option(
-        '--max-views', metavar='COUNT', dest='max_views',
-        default=None, type=int,
-        help="Do not download any videos with more than COUNT views",)
-    selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
-    selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
-                         help='download only videos suitable for the given age',
-                         default=None, type=int)
-    selection.add_option('--download-archive', metavar='FILE',
-                         dest='download_archive',
-                         help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
-    selection.add_option(
-        '--include-ads', dest='include_ads',
-        action='store_true',
-        help='Download advertisements as well (experimental)')
-    selection.add_option(
-        '--youtube-include-dash-manifest', action='store_true',
-        dest='youtube_include_dash_manifest', default=False,
-        help='Try to download the DASH manifest on YouTube videos (experimental)')
-
-    authentication.add_option('-u', '--username',
-            dest='username', metavar='USERNAME', help='account username')
-    authentication.add_option('-p', '--password',
-            dest='password', metavar='PASSWORD', help='account password')
-    authentication.add_option('-2', '--twofactor',
-            dest='twofactor', metavar='TWOFACTOR', help='two-factor auth code')
-    authentication.add_option('-n', '--netrc',
-            action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
-    authentication.add_option('--video-password',
-            dest='videopassword', metavar='PASSWORD', help='video password (vimeo, smotri)')
-
-
-    video_format.add_option('-f', '--format',
-            action='store', dest='format', metavar='FORMAT', default=None,
-            help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.')
-    video_format.add_option('--all-formats',
-            action='store_const', dest='format', help='download all available video formats', const='all')
-    video_format.add_option('--prefer-free-formats',
-            action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
-    video_format.add_option('--max-quality',
-            action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
-    video_format.add_option('-F', '--list-formats',
-            action='store_true', dest='listformats', help='list all available formats')
-
-    subtitles.add_option('--write-sub', '--write-srt',
-            action='store_true', dest='writesubtitles',
-            help='write subtitle file', default=False)
-    subtitles.add_option('--write-auto-sub', '--write-automatic-sub',
-            action='store_true', dest='writeautomaticsub',
-            help='write automatic subtitle file (youtube only)', default=False)
-    subtitles.add_option('--all-subs',
-            action='store_true', dest='allsubtitles',
-            help='downloads all the available subtitles of the video', default=False)
-    subtitles.add_option('--list-subs',
-            action='store_true', dest='listsubtitles',
-            help='lists all available subtitles for the video', default=False)
-    subtitles.add_option('--sub-format',
-            action='store', dest='subtitlesformat', metavar='FORMAT',
-            help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
-    subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang',
-            action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
-            default=[], callback=_comma_separated_values_options_callback,
-            help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
-
-    downloader.add_option('-r', '--rate-limit',
-            dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)')
-    downloader.add_option('-R', '--retries',
-            dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
-    downloader.add_option('--buffer-size',
-            dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16K) (default is %default)', default="1024")
-    downloader.add_option('--no-resize-buffer',
-            action='store_true', dest='noresizebuffer',
-            help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
-    downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
-
-    workarounds.add_option(
-        '--encoding', dest='encoding', metavar='ENCODING',
-        help='Force the specified encoding (experimental)')
-    workarounds.add_option(
-        '--no-check-certificate', action='store_true',
-        dest='no_check_certificate', default=False,
-        help='Suppress HTTPS certificate validation.')
-    workarounds.add_option(
-        '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
-        help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
-    workarounds.add_option(
-        '--user-agent', metavar='UA',
-        dest='user_agent', help='specify a custom user agent')
-    workarounds.add_option(
-        '--referer', metavar='REF',
-        dest='referer', default=None,
-        help='specify a custom referer, use if the video access is restricted to one domain',
-    )
-    workarounds.add_option(
-        '--add-header', metavar='FIELD:VALUE',
-        dest='headers', action='append',
-        help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
-    )
-    workarounds.add_option(
-        '--bidi-workaround', dest='bidi_workaround', action='store_true',
-        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
-
-    verbosity.add_option('-q', '--quiet',
-            action='store_true', dest='quiet', help='activates quiet mode', default=False)
-    verbosity.add_option(
-        '--no-warnings',
-        dest='no_warnings', action='store_true', default=False,
-        help='Ignore warnings')
-    verbosity.add_option('-s', '--simulate',
-            action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
-    verbosity.add_option('--skip-download',
-            action='store_true', dest='skip_download', help='do not download the video', default=False)
-    verbosity.add_option('-g', '--get-url',
-            action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
-    verbosity.add_option('-e', '--get-title',
-            action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
-    verbosity.add_option('--get-id',
-            action='store_true', dest='getid', help='simulate, quiet but print id', default=False)
-    verbosity.add_option('--get-thumbnail',
-            action='store_true', dest='getthumbnail',
-            help='simulate, quiet but print thumbnail URL', default=False)
-    verbosity.add_option('--get-description',
-            action='store_true', dest='getdescription',
-            help='simulate, quiet but print video description', default=False)
-    verbosity.add_option('--get-duration',
-            action='store_true', dest='getduration',
-            help='simulate, quiet but print video length', default=False)
-    verbosity.add_option('--get-filename',
-            action='store_true', dest='getfilename',
-            help='simulate, quiet but print output filename', default=False)
-    verbosity.add_option('--get-format',
-            action='store_true', dest='getformat',
-            help='simulate, quiet but print output format', default=False)
-    verbosity.add_option('-j', '--dump-json',
-            action='store_true', dest='dumpjson',
-            help='simulate, quiet but print JSON information. See --output for a description of available keys.', default=False)
-    verbosity.add_option('--newline',
-            action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
-    verbosity.add_option('--no-progress',
-            action='store_true', dest='noprogress', help='do not print progress bar', default=False)
-    verbosity.add_option('--console-title',
-            action='store_true', dest='consoletitle',
-            help='display progress in console titlebar', default=False)
-    verbosity.add_option('-v', '--verbose',
-            action='store_true', dest='verbose', help='print various debugging information', default=False)
-    verbosity.add_option('--dump-intermediate-pages',
-            action='store_true', dest='dump_intermediate_pages', default=False,
-            help='print downloaded pages to debug problems (very verbose)')
-    verbosity.add_option('--write-pages',
-            action='store_true', dest='write_pages', default=False,
-            help='Write downloaded intermediary pages to files in the current directory to debug problems')
-    verbosity.add_option('--youtube-print-sig-code',
-            action='store_true', dest='youtube_print_sig_code', default=False,
-            help=optparse.SUPPRESS_HELP)
-    verbosity.add_option('--print-traffic',
-            dest='debug_printtraffic', action='store_true', default=False,
-            help='Display sent and read HTTP traffic')
-
-
-    filesystem.add_option('-a', '--batch-file',
-            dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
-    filesystem.add_option('--id',
-            action='store_true', dest='useid', help='use only video ID in file name', default=False)
-    filesystem.add_option('-A', '--auto-number',
-            action='store_true', dest='autonumber',
-            help='number downloaded files starting from 00000', default=False)
-    filesystem.add_option('-o', '--output',
-            dest='outtmpl', metavar='TEMPLATE',
-            help=('output filename template. Use %(title)s to get the title, '
-                  '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
-                  '%(autonumber)s to get an automatically incremented number, '
-                  '%(ext)s for the filename extension, '
-                  '%(format)s for the format description (like "22 - 1280x720" or "HD"), '
-                  '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), '
-                  '%(upload_date)s for the upload date (YYYYMMDD), '
-                  '%(extractor)s for the provider (youtube, metacafe, etc), '
-                  '%(id)s for the video id, %(playlist)s for the playlist the video is in, '
-                  '%(playlist_index)s for the position in the playlist and %% for a literal percent. '
-                  '%(height)s and %(width)s for the width and height of the video format. '
-                  '%(resolution)s for a textual description of the resolution of the video format. '
-                  'Use - to output to stdout. Can also be used to download to a different directory, '
-                  'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
-    filesystem.add_option('--autonumber-size',
-            dest='autonumber_size', metavar='NUMBER',
-            help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
-    filesystem.add_option('--restrict-filenames',
-            action='store_true', dest='restrictfilenames',
-            help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
-    filesystem.add_option('-t', '--title',
-            action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False)
-    filesystem.add_option('-l', '--literal',
-            action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
-    filesystem.add_option('-w', '--no-overwrites',
-            action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
-    filesystem.add_option('-c', '--continue',
-            action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True)
-    filesystem.add_option('--no-continue',
-            action='store_false', dest='continue_dl',
-            help='do not resume partially downloaded files (restart from beginning)')
-    filesystem.add_option('--no-part',
-            action='store_true', dest='nopart', help='do not use .part files', default=False)
-    filesystem.add_option('--no-mtime',
-            action='store_false', dest='updatetime',
-            help='do not use the Last-modified header to set the file modification time', default=True)
-    filesystem.add_option('--write-description',
-            action='store_true', dest='writedescription',
-            help='write video description to a .description file', default=False)
-    filesystem.add_option('--write-info-json',
-            action='store_true', dest='writeinfojson',
-            help='write video metadata to a .info.json file', default=False)
-    filesystem.add_option('--write-annotations',
-            action='store_true', dest='writeannotations',
-            help='write video annotations to a .annotation file', default=False)
-    filesystem.add_option('--write-thumbnail',
-            action='store_true', dest='writethumbnail',
-            help='write thumbnail image to disk', default=False)
-    filesystem.add_option('--load-info',
-            dest='load_info_filename', metavar='FILE',
-            help='json file containing the video information (created with the "--write-json" option)')
-    filesystem.add_option('--cookies',
-            dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
-    filesystem.add_option(
-        '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
-        help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
-    filesystem.add_option(
-        '--no-cache-dir', action='store_const', const=None, dest='cachedir',
-        help='Disable filesystem caching')
-    filesystem.add_option(
-        '--rm-cache-dir', action='store_true', dest='rm_cachedir',
-        help='Delete all filesystem cache files')
-
-
-    postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
-            help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
-    postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
-            help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; best by default')
-    postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
-            help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
-    postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None,
-            help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)')
-    postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
-            help='keeps the video file on disk after the post-processing; the video is erased by default')
-    postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
-            help='do not overwrite post-processed files; the post-processed files are overwritten by default')
-    postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
-            help='embed subtitles in the video (only for mp4 videos)')
-    postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False,
-            help='embed thumbnail in the audio as cover art')
-    postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
-            help='write metadata to the video file')
-    postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False,
-            help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
-    postproc.add_option('--prefer-avconv', action='store_false', dest='prefer_ffmpeg',
-        help='Prefer avconv over ffmpeg for running the postprocessors (default)')
-    postproc.add_option('--prefer-ffmpeg', action='store_true', dest='prefer_ffmpeg',
-        help='Prefer ffmpeg over avconv for running the postprocessors')
-    postproc.add_option(
-        '--exec', metavar='CMD', dest='exec_cmd',
-        help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'' )
-
-    parser.add_option_group(general)
-    parser.add_option_group(selection)
-    parser.add_option_group(downloader)
-    parser.add_option_group(filesystem)
-    parser.add_option_group(verbosity)
-    parser.add_option_group(workarounds)
-    parser.add_option_group(video_format)
-    parser.add_option_group(subtitles)
-    parser.add_option_group(authentication)
-    parser.add_option_group(postproc)
-
-    if overrideArguments is not None:
-        opts, args = parser.parse_args(overrideArguments)
-        if opts.verbose:
-            write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
-    else:
-        commandLineConf = sys.argv[1:]
-        if '--ignore-config' in commandLineConf:
-            systemConf = []
-            userConf = []
-        else:
-            systemConf = _readOptions('/etc/youtube-dl.conf')
-            if '--ignore-config' in systemConf:
-                userConf = []
-            else:
-                userConf = _readUserConf()
-        argv = systemConf + userConf + commandLineConf
-
-        opts, args = parser.parse_args(argv)
-        if opts.verbose:
-            write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
-            write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
-            write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
-
-    return parser, opts, args
-
-
 def _real_main(argv=None):
     # Compatibility fixes for Windows
     if sys.platform == 'win32':
@@ -872,20 +401,7 @@ def _real_main(argv=None):
 
         # Remove cache dir
         if opts.rm_cachedir:
-            if opts.cachedir is None:
-                ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)')
-            else:
-                if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir):
-                    ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir')
-                    retcode = 141
-                else:
-                    ydl.to_screen(
-                        u'Removing cache dir %s .' % opts.cachedir,
-                        skip_eol=True)
-                    if os.path.exists(opts.cachedir):
-                        ydl.to_screen(u'.', skip_eol=True)
-                        shutil.rmtree(opts.cachedir)
-                    ydl.to_screen(u'.')
+            ydl.cache.remove()
 
         # Maybe do nothing
         if (len(all_urls) < 1) and (opts.load_info_filename is None):
diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py
new file mode 100644 (file)
index 0000000..79ff09f
--- /dev/null
@@ -0,0 +1,94 @@
+from __future__ import unicode_literals
+
+import errno
+import io
+import json
+import os
+import re
+import shutil
+import traceback
+
+from .utils import (
+    write_json_file,
+)
+
+
+class Cache(object):
+    def __init__(self, ydl):
+        self._ydl = ydl
+
+    def _get_root_dir(self):
+        res = self._ydl.params.get('cachedir')
+        if res is None:
+            cache_root = os.environ.get('XDG_CACHE_HOME', '~/.cache')
+            res = os.path.join(cache_root, 'youtube-dl')
+        return os.path.expanduser(res)
+
+    def _get_cache_fn(self, section, key, dtype):
+        assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
+            'invalid section %r' % section
+        assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
+        return os.path.join(
+            self._get_root_dir(), section, '%s.%s' % (key, dtype))
+
+    @property
+    def enabled(self):
+        return self._ydl.params.get('cachedir') is not False
+
+    def store(self, section, key, data, dtype='json'):
+        assert dtype in ('json',)
+
+        if not self.enabled:
+            return
+
+        fn = self._get_cache_fn(section, key, dtype)
+        try:
+            try:
+                os.makedirs(os.path.dirname(fn))
+            except OSError as ose:
+                if ose.errno != errno.EEXIST:
+                    raise
+            write_json_file(data, fn)
+        except Exception:
+            tb = traceback.format_exc()
+            self._ydl.report_warning(
+                'Writing cache to %r failed: %s' % (fn, tb))
+
+    def load(self, section, key, dtype='json', default=None):
+        assert dtype in ('json',)
+
+        if not self.enabled:
+            return default
+
+        cache_fn = self._get_cache_fn(section, key, dtype)
+        try:
+            try:
+                with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
+                    return json.load(cachef)
+            except ValueError:
+                try:
+                    file_size = os.path.getsize(cache_fn)
+                except (OSError, IOError) as oe:
+                    file_size = str(oe)
+                self._ydl.report_warning(
+                    'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
+        except IOError:
+            pass  # No cache available
+
+        return default
+
+    def remove(self):
+        if not self.enabled:
+            self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
+            return
+
+        cachedir = self._get_root_dir()
+        if not any((term in cachedir) for term in ('cache', 'tmp')):
+            raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
+
+        self._ydl.to_screen(
+            'Removing cache dir %s .' % cachedir, skip_eol=True)
+        if os.path.exists(cachedir):
+            self._ydl.to_screen('.', skip_eol=True)
+            shutil.rmtree(cachedir)
+        self._ydl.to_screen('.')
index 9f29e2f8110ef09d8bba4c1d57e38acb8da8a52e..32852f333a0a6329f48ef2d690555d218d4228c5 100644 (file)
@@ -3,6 +3,7 @@ import subprocess
 
 from .common import FileDownloader
 from ..utils import (
+    check_executable,
     encodeFilename,
 )
 
@@ -19,13 +20,11 @@ class HlsFD(FileDownloader):
             encodeFilename(tmpfilename, for_subprocess=True)]
 
         for program in ['avconv', 'ffmpeg']:
-            try:
-                subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
+            if check_executable(program, ['-version']):
                 break
-            except (OSError, IOError):
-                pass
         else:
             self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
+            return False
         cmd = [program] + args
 
         retval = subprocess.call(cmd)
@@ -42,5 +41,5 @@ class HlsFD(FileDownloader):
             return True
         else:
             self.to_stderr(u"\n")
-            self.report_error(u'ffmpeg exited with code %d' % retval)
+            self.report_error(u'%s exited with code %d' % (program, retval))
             return False
index d01d1897e411fc5005c15868b46cfdf174c2ca4c..6caf7451ed99a00511c56cc4a7c6cb7711601ff0 100644 (file)
@@ -193,7 +193,8 @@ class HttpFD(FileDownloader):
             self.to_stderr(u"\n")
             self.report_error(u'Did not get any data blocks')
             return False
-        stream.close()
+        if tmpfilename != u'-':
+            stream.close()
         self.report_finish(data_len_str, (time.time() - start))
         if data_len is not None and byte_counter != data_len:
             raise ContentTooShortError(byte_counter, int(data_len))
index 68646709a16cf7c9dcec0ac1c5e09f5643a9a7a2..5eb108302339ec1678458fd8572c219a0980200b 100644 (file)
@@ -8,9 +8,10 @@ import time
 
 from .common import FileDownloader
 from ..utils import (
+    check_executable,
+    compat_str,
     encodeFilename,
     format_bytes,
-    compat_str,
 )
 
 
@@ -103,9 +104,7 @@ class RtmpFD(FileDownloader):
         test = self.params.get('test', False)
 
         # Check for rtmpdump first
-        try:
-            subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
-        except (OSError, IOError):
+        if not check_executable('rtmpdump', ['-h']):
             self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
             return False
 
index 7f0736ee82f9fa7784c4c1e152b18171fe2ab2cb..4b83d8d9964b6c5e73c3968c8a8fe1da974fe36e 100644 (file)
@@ -4,12 +4,13 @@ from .addanime import AddAnimeIE
 from .adultswim import AdultSwimIE
 from .aftonbladet import AftonbladetIE
 from .anitube import AnitubeIE
+from .anysex import AnySexIE
 from .aol import AolIE
 from .allocine import AllocineIE
 from .aparat import AparatIE
 from .appletrailers import AppleTrailersIE
 from .archiveorg import ArchiveOrgIE
-from .ard import ARDIE
+from .ard import ARDIE, ARDMediathekIE
 from .arte import (
     ArteTvIE,
     ArteTVPlus7IE,
@@ -23,6 +24,7 @@ from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbccouk import BBCCoUkIE
+from .beeg import BeegIE
 from .bilibili import BiliBiliIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
@@ -44,6 +46,7 @@ from .cinemassacre import CinemassacreIE
 from .clipfish import ClipfishIE
 from .cliphunter import CliphunterIE
 from .clipsyndicate import ClipsyndicateIE
+from .cloudy import CloudyIE
 from .clubic import ClubicIE
 from .cmt import CMTIE
 from .cnet import CNETIE
@@ -65,9 +68,12 @@ from .dailymotion import (
     DailymotionUserIE,
 )
 from .daum import DaumIE
+from .dbtv import DBTVIE
+from .deezer import DeezerPlaylistIE
 from .dfb import DFBIE
 from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
+from .drtuber import DrTuberIE
 from .drtv import DRTVIE
 from .dump import DumpIE
 from .defense import DefenseGouvFrIE
@@ -84,8 +90,9 @@ from .ellentv import (
     EllenTVClipsIE,
 )
 from .elpais import ElPaisIE
-from .empflix import EmpflixIE
+from .empflix import EMPFlixIE
 from .engadget import EngadgetIE
+from .eporner import EpornerIE
 from .escapist import EscapistIE
 from .everyonesmixtape import EveryonesMixtapeIE
 from .exfm import ExfmIE
@@ -135,6 +142,8 @@ from .grooveshark import GroovesharkIE
 from .hark import HarkIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
+from .hornbunny import HornBunnyIE
+from .hostingbulk import HostingBulkIE
 from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
 from .howstuffworks import HowStuffWorksIE
@@ -195,6 +204,7 @@ from .mitele import MiTeleIE
 from .mixcloud import MixcloudIE
 from .mlb import MLBIE
 from .mpora import MporaIE
+from .moevideo import MoeVideoIE
 from .mofosex import MofosexIE
 from .mojvideo import MojvideoIE
 from .mooshare import MooshareIE
@@ -210,6 +220,7 @@ from .mtv import (
     MTVIggyIE,
 )
 from .musicplayon import MusicPlayOnIE
+from .musicvault import MusicVaultIE
 from .muzu import MuzuTVIE
 from .myspace import MySpaceIE
 from .myspass import MySpassIE
@@ -230,6 +241,7 @@ from .niconico import NiconicoIE
 from .ninegag import NineGagIE
 from .noco import NocoIE
 from .normalboots import NormalbootsIE
+from .nosvideo import NosVideoIE
 from .novamov import NovaMovIE
 from .nowness import NownessIE
 from .nowvideo import NowVideoIE
@@ -257,6 +269,8 @@ from .podomatic import PodomaticIE
 from .pornhd import PornHdIE
 from .pornhub import PornHubIE
 from .pornotube import PornotubeIE
+from .pornoxo import PornoXOIE
+from .promptfile import PromptFileIE
 from .prosiebensat1 import ProSiebenSat1IE
 from .pyvideo import PyvideoIE
 from .radiofrance import RadioFranceIE
@@ -288,6 +302,7 @@ from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .servingsys import ServingSysIE
 from .shared import SharedIE
+from .sharesix import ShareSixIE
 from .sina import SinaIE
 from .slideshare import SlideshareIE
 from .slutload import SlutloadIE
@@ -313,13 +328,15 @@ from .southpark import (
 )
 from .space import SpaceIE
 from .spankwire import SpankwireIE
-from .spiegel import SpiegelIE
+from .spiegel import SpiegelIE, SpiegelArticleIE
 from .spiegeltv import SpiegeltvIE
 from .spike import SpikeIE
+from .sportdeutschland import SportDeutschlandIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
+from .sunporno import SunPornoIE
 from .swrmediathek import SWRMediathekIE
 from .syfy import SyfyIE
 from .sztvhu import SztvHuIE
@@ -332,6 +349,7 @@ from .teachingchannel import TeachingChannelIE
 from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
+from .telemb import TeleMBIE
 from .tenplay import TenPlayIE
 from .testurl import TestURLIE
 from .tf1 import TF1IE
@@ -339,6 +357,7 @@ from .theplatform import ThePlatformIE
 from .thisav import ThisAVIE
 from .tinypic import TinyPicIE
 from .tlc import TlcIE, TlcDeIE
+from .tnaflix import TNAFlixIE
 from .toutv import TouTvIE
 from .toypics import ToypicsUserIE, ToypicsIE
 from .traileraddict import TrailerAddictIE
@@ -347,6 +366,7 @@ from .trutube import TruTubeIE
 from .tube8 import Tube8IE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
+from .turbo import TurboIE
 from .tutv import TutvIE
 from .tvigle import TvigleIE
 from .tvp import TvpIE
@@ -364,6 +384,7 @@ from .veehd import VeeHDIE
 from .veoh import VeohIE
 from .vesti import VestiIE
 from .vevo import VevoIE
+from .vgtv import VGTVIE
 from .vh1 import VH1IE
 from .viddler import ViddlerIE
 from .videobam import VideoBamIE
@@ -391,6 +412,7 @@ from .vine import (
 from .viki import VikiIE
 from .vk import VKIE
 from .vodlocker import VodlockerIE
+from .vporn import VpornIE
 from .vube import VubeIE
 from .vuclip import VuClipIE
 from .vulture import VultureIE
index 59d3bbba413c3c256a3f77917708fb171e337b14..c983ef0f519c303f880f471d357db4b60657ef17 100644 (file)
@@ -7,6 +7,15 @@ from .common import InfoExtractor
 class AcademicEarthCourseIE(InfoExtractor):
     _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
     IE_NAME = 'AcademicEarth:Course'
+    _TEST = {
+        'url': 'http://academicearth.org/playlists/laws-of-nature/',
+        'info_dict': {
+            'id': 'laws-of-nature',
+            'title': 'Laws of Nature',
+            'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.',
+        },
+        'playlist_count': 4,
+    }
 
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
index a00bfcb35fc8f61b2192592a57776921d0dee9f6..b4b40f2d4f21432f6b12a883513ae00827af00e5 100644 (file)
@@ -75,7 +75,9 @@ class AdultSwimIE(InfoExtractor):
         video_path = mobj.group('path')
 
         webpage = self._download_webpage(url, video_path)
-        episode_id = self._html_search_regex(r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>', webpage, 'episode_id')
+        episode_id = self._html_search_regex(
+            r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>',
+            webpage, 'episode_id')
         title = self._og_search_title(webpage)
 
         index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id
@@ -97,7 +99,9 @@ class AdultSwimIE(InfoExtractor):
             duration = segment_el.attrib.get('duration')
 
             segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id
-            idoc = self._download_xml(segment_url, segment_title, 'Downloading segment information', 'Unable to download segment information')
+            idoc = self._download_xml(
+                segment_url, segment_title,
+                'Downloading segment information', 'Unable to download segment information')
 
             formats = []
             file_els = idoc.findall('.//files/file')
diff --git a/youtube_dl/extractor/anysex.py b/youtube_dl/extractor/anysex.py
new file mode 100644 (file)
index 0000000..bc64423
--- /dev/null
@@ -0,0 +1,61 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    int_or_none,
+)
+
+
+class AnySexIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?anysex\.com/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://anysex.com/156592/',
+        'md5': '023e9fbb7f7987f5529a394c34ad3d3d',
+        'info_dict': {
+            'id': '156592',
+            'ext': 'mp4',
+            'title': 'Busty and sexy blondie in her bikini strips for you',
+            'description': 'md5:de9e418178e2931c10b62966474e1383',
+            'categories': ['Erotic'],
+            'duration': 270,
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._html_search_regex(r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
+
+        title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
+        description = self._html_search_regex(
+            r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
+        thumbnail = self._html_search_regex(
+            r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False)
+
+        categories = re.findall(
+            r'<a href="http://anysex\.com/categories/[^"]+" title="[^"]*">([^<]+)</a>', webpage)
+
+        duration = parse_duration(self._search_regex(
+            r'<b>Duration:</b> (\d+:\d+)', webpage, 'duration', fatal=False))
+        view_count = int_or_none(self._html_search_regex(
+            r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp4',
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'categories': categories,
+            'duration': duration,
+            'view_count': view_count,
+            'age_limit': 18,
+        }
index a7bfe5a5c8d8911f6d9c8e3b65762de1a822df57..47f8e415777ee21bfa5e001921077f3c9aaa16af 100644 (file)
@@ -21,7 +21,7 @@ class AolIE(InfoExtractor):
         (?:$|\?)
     '''
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
         'md5': '18ef68f48740e86ae94b98da815eec42',
         'info_dict': {
@@ -30,7 +30,14 @@ class AolIE(InfoExtractor):
             'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
         },
         'add_ie': ['FiveMin'],
-    }
+    }, {
+        'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
+        'info_dict': {
+            'id': '152147',
+            'title': 'Brace Yourself - Today\'s Weirdest News',
+        },
+        'playlist_mincount': 10,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index 7f0da8ab6d5b9f0e62f2af18c74c26185c505259..12457f0f996db46d48823836c50e98048162c83c 100644 (file)
@@ -10,10 +10,15 @@ from ..utils import (
     qualities,
     compat_urllib_parse_urlparse,
     compat_urllib_parse,
+    int_or_none,
+    parse_duration,
+    unified_strdate,
+    xpath_text,
 )
 
 
-class ARDIE(InfoExtractor):
+class ARDMediathekIE(InfoExtractor):
+    IE_NAME = 'ARD:mediathek'
     _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
 
     _TESTS = [{
@@ -128,3 +133,61 @@ class ARDIE(InfoExtractor):
             'formats': formats,
             'thumbnail': thumbnail,
         }
+
+
+class ARDIE(InfoExtractor):
+    _VALID_URL = '(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
+    _TEST = {
+        'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
+        'md5': 'd216c3a86493f9322545e045ddc3eb35',
+        'info_dict': {
+            'display_id': 'die-story-im-ersten-mission-unter-falscher-flagge',
+            'id': '100',
+            'ext': 'mp4',
+            'duration': 2600,
+            'title': 'Die Story im Ersten: Mission unter falscher Flagge',
+            'upload_date': '20140804',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')
+
+        player_url = mobj.group('mainurl') + '~playerXml.xml'
+        doc = self._download_xml(player_url, display_id)
+        video_node = doc.find('./video')
+        upload_date = unified_strdate(xpath_text(
+            video_node, './broadcastDate'))
+        thumbnail = xpath_text(video_node, './/teaserImage//variant/url')
+
+        formats = []
+        for a in video_node.findall('.//asset'):
+            f = {
+                'format_id': a.attrib['type'],
+                'width': int_or_none(a.find('./frameWidth').text),
+                'height': int_or_none(a.find('./frameHeight').text),
+                'vbr': int_or_none(a.find('./bitrateVideo').text),
+                'abr': int_or_none(a.find('./bitrateAudio').text),
+                'vcodec': a.find('./codecVideo').text,
+                'tbr': int_or_none(a.find('./totalBitrate').text),
+            }
+            if a.find('./serverPrefix').text:
+                f['url'] = a.find('./serverPrefix').text
+                f['playpath'] = a.find('./fileName').text
+            else:
+                f['url'] = a.find('./fileName').text
+            formats.append(f)
+        self._sort_formats(formats)
+
+        return {
+            'id': mobj.group('id'),
+            'formats': formats,
+            'display_id': display_id,
+            'title': video_node.find('./title').text,
+            'duration': parse_duration(video_node.find('./duration').text),
+            'upload_date': upload_date,
+            'thumbnail': thumbnail,
+        }
+
index 1c72b2ff6f5b4d5a6c01a4a0dfde2b2de6517e04..957d35979d34244f7c4cb4df649a349453bf9123 100644 (file)
@@ -78,7 +78,8 @@ class ArteTVPlus7IE(InfoExtractor):
 
     def _extract_from_webpage(self, webpage, video_id, lang):
         json_url = self._html_search_regex(
-            r'arte_vp_url="(.*?)"', webpage, 'json vp url')
+            [r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
+            webpage, 'json vp url')
         return self._extract_from_json_url(json_url, video_id, lang)
 
     def _extract_from_json_url(self, json_url, video_id, lang):
index ccd31c4c7093d54e86df50a42600d08e12e55005..de5d4faf3b920ddb0f2231f05311b9858dc5ef86 100644 (file)
@@ -59,6 +59,13 @@ class BambuserChannelIE(InfoExtractor):
     _VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
     # The maximum number we can get with each request
     _STEP = 50
+    _TEST = {
+        'url': 'http://bambuser.com/channel/pixelversity',
+        'info_dict': {
+            'title': 'pixelversity',
+        },
+        'playlist_mincount': 60,
+    }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -73,10 +80,10 @@ class BambuserChannelIE(InfoExtractor):
             req = compat_urllib_request.Request(req_url)
             # Without setting this header, we wouldn't get any result
             req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
-            info_json = self._download_webpage(req, user,
-                'Downloading page %d' % i)
-            results = json.loads(info_json)['result']
-            if len(results) == 0:
+            data = self._download_json(
+                req, user, 'Downloading page %d' % i)
+            results = data['result']
+            if not results:
                 break
             last_id = results[-1]['vid']
             urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
index dcbbdef4346c36c789e49531df1dc602bc35255b..c569aa4d26e8c2f41d8e16bf83cf17c7060fa7b3 100644 (file)
@@ -96,7 +96,7 @@ class BandcampAlbumIE(InfoExtractor):
     IE_NAME = 'Bandcamp:album'
     _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
         'playlist': [
             {
@@ -118,7 +118,13 @@ class BandcampAlbumIE(InfoExtractor):
             'playlistend': 2
         },
         'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
-    }
+    }, {
+        'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
+        'info_dict': {
+            'title': 'Hierophany of the Open Grave',
+        },
+        'playlist_mincount': 9,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py
new file mode 100644 (file)
index 0000000..314e37f
--- /dev/null
@@ -0,0 +1,65 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class BeegIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://beeg.com/5416503',
+        'md5': '634526ae978711f6b748fe0dd6c11f57',
+        'info_dict': {
+            'id': '5416503',
+            'ext': 'mp4',
+            'title': 'Sultry Striptease',
+            'description': 'md5:6db3c6177972822aaba18652ff59c773',
+            'categories': list,  # NSFW
+            'thumbnail': 're:https?://.*\.jpg$',
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        quality_arr = self._search_regex(
+            r'(?s)var\s+qualityArr\s*=\s*{\s*(.+?)\s*}', webpage, 'quality formats')
+
+        formats = [{
+            'url': fmt[1],
+            'format_id': fmt[0],
+            'height': int(fmt[0][:-1]),
+        } for fmt in re.findall(r"'([^']+)'\s*:\s*'([^']+)'", quality_arr)]
+
+        self._sort_formats(formats)
+
+        title = self._html_search_regex(
+            r'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage, 'title')
+        
+        description = self._html_search_regex(
+            r'<meta name="description" content="([^"]*)"',
+            webpage, 'description', fatal=False)
+        thumbnail = self._html_search_regex(
+            r'\'previewer.url\'\s*:\s*"([^"]*)"',
+            webpage, 'thumbnail', fatal=False)
+
+        categories_str = self._html_search_regex(
+            r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
+        categories = (
+            None if categories_str is None
+            else categories_str.split(','))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'categories': categories,
+            'formats': formats,
+            'age_limit': 18,
+        }
index 86f0c2861e35f296f594a4ac45bbfe74b799d9e0..4e2960c6260ebf6bf0d242a6b0bfc38baf40b25c 100644 (file)
@@ -28,17 +28,6 @@ class BRIE(InfoExtractor):
                 'duration': 34,
             }
         },
-        {
-            'url': 'http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html',
-            'md5': 'ab451b09d861dbed7d7cc9ab0be19ebe',
-            'info_dict': {
-                'id': '2c060e69-3a27-4e13-b0f0-668fac17d812',
-                'ext': 'mp4',
-                'title': 'Über den Pass',
-                'description': 'Die Eroberung der Alpen: Über den Pass',
-                'duration': 2588,
-            }
-        },
         {
             'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html',
             'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820',
index 822f9a7be1e1c9df23ca0e8fc164a883f706cba1..db48dc24fa2a4698ac0dc8a28033b8cca51d3d44 100644 (file)
@@ -25,7 +25,7 @@ class CBSIE(InfoExtractor):
     }, {
         'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
         'info_dict': {
-            'id': 'P9gjWjelt6iP',
+            'id': 'WWF_5KqY3PK1',
             'ext': 'flv',
             'title': 'Live on Letterman - St. Vincent',
             'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',
index a62395d4b727ce917f1ea946b63940b3f52b6bdd..c922f695905d70e4052ddfa5c8f336c01221413b 100644 (file)
@@ -42,7 +42,7 @@ class ChilloutzoneIE(InfoExtractor):
             'id': '85523671',
             'ext': 'mp4',
             'title': 'The Sunday Times - Icons',
-            'description': 'md5:a5f7ff82e2f7a9ed77473fe666954e84',
+            'description': 're:(?s)^Watch the making of - makingoficons.com.{300,}',
             'uploader': 'Us',
             'uploader_id': 'usfilms',
             'upload_date': '20140131'
index 58846e8e7cfacc631015ce32ace23cbd9b8a2576..65c12136a3a636763e5d41fb307beb798ee27b83 100644 (file)
@@ -1,11 +1,13 @@
 from __future__ import unicode_literals
 
+import json
 import re
 
 from .common import InfoExtractor
+from ..utils import int_or_none
 
 
-translation_table = {
+_translation_table = {
     'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
     'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
     'y': 'l', 'z': 'i',
@@ -13,6 +15,10 @@ translation_table = {
 }
 
 
+def _decode(s):
+    return ''.join(_translation_table.get(c, c) for c in s)
+
+
 class CliphunterIE(InfoExtractor):
     IE_NAME = 'cliphunter'
 
@@ -22,10 +28,14 @@ class CliphunterIE(InfoExtractor):
     '''
     _TEST = {
         'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
-        'file': '1012420.flv',
-        'md5': '15e7740f30428abf70f4223478dc1225',
+        'md5': 'a2ba71eebf523859fe527a61018f723e',
         'info_dict': {
+            'id': '1012420',
+            'ext': 'mp4',
             'title': 'Fun Jynx Maze solo',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'age_limit': 18,
+            'duration': 1317,
         }
     }
 
@@ -35,22 +45,55 @@ class CliphunterIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
+        video_title = self._search_regex(
+            r'mediaTitle = "([^"]+)"', webpage, 'title')
+
         pl_fiji = self._search_regex(
             r'pl_fiji = \'([^\']+)\'', webpage, 'video data')
         pl_c_qual = self._search_regex(
             r'pl_c_qual = "(.)"', webpage, 'video quality')
-        video_title = self._search_regex(
-            r'mediaTitle = "([^"]+)"', webpage, 'title')
-
-        video_url = ''.join(translation_table.get(c, c) for c in pl_fiji)
-
+        video_url = _decode(pl_fiji)
         formats = [{
             'url': video_url,
-            'format_id': pl_c_qual,
+            'format_id': 'default-%s' % pl_c_qual,
         }]
 
+        qualities_json = self._search_regex(
+            r'var pl_qualities\s*=\s*(.*?);\n', webpage, 'quality info')
+        qualities_data = json.loads(qualities_json)
+
+        for i, t in enumerate(
+                re.findall(r"pl_fiji_([a-z0-9]+)\s*=\s*'([^']+')", webpage)):
+            quality_id, crypted_url = t
+            video_url = _decode(crypted_url)
+            f = {
+                'format_id': quality_id,
+                'url': video_url,
+                'quality': i,
+            }
+            if quality_id in qualities_data:
+                qd = qualities_data[quality_id]
+                m = re.match(
+                    r'''(?x)<b>(?P<width>[0-9]+)x(?P<height>[0-9]+)<\\/b>
+                        \s*\(\s*(?P<tbr>[0-9]+)\s*kb\\/s''', qd)
+                if m:
+                    f['width'] = int(m.group('width'))
+                    f['height'] = int(m.group('height'))
+                    f['tbr'] = int(m.group('tbr'))
+            formats.append(f)
+        self._sort_formats(formats)
+
+        thumbnail = self._search_regex(
+            r"var\s+mov_thumb\s*=\s*'([^']+)';",
+            webpage, 'thumbnail', fatal=False)
+        duration = int_or_none(self._search_regex(
+            r'pl_dur\s*=\s*([0-9]+)', webpage, 'duration', fatal=False))
+
         return {
             'id': video_id,
             'title': video_title,
             'formats': formats,
+            'duration': duration,
+            'age_limit': self._rta_search(webpage),
+            'thumbnail': thumbnail,
         }
diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py
new file mode 100644 (file)
index 0000000..386f080
--- /dev/null
@@ -0,0 +1,108 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    compat_parse_qs,
+    compat_urllib_parse,
+    remove_end,
+    HEADRequest,
+    compat_HTTPError,
+)
+
+
+class CloudyIE(InfoExtractor):
+    _IE_DESC = 'cloudy.ec and videoraj.ch'
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.ch)/
+        (?:v/|embed\.php\?id=)
+        (?P<id>[A-Za-z0-9]+)
+        '''
+    _EMBED_URL = 'http://www.%s/embed.php?id=%s'
+    _API_URL = 'http://www.%s/api/player.api.php?%s'
+    _MAX_TRIES = 2
+    _TESTS = [
+        {
+            'url': 'https://www.cloudy.ec/v/af511e2527aac',
+            'md5': '5cb253ace826a42f35b4740539bedf07',
+            'info_dict': {
+                'id': 'af511e2527aac',
+                'ext': 'flv',
+                'title': 'Funny Cats and Animals Compilation june 2013',
+            }
+        },
+        {
+            'url': 'http://www.videoraj.ch/v/47f399fd8bb60',
+            'md5': '7d0f8799d91efd4eda26587421c3c3b0',
+            'info_dict': {
+                'id': '47f399fd8bb60',
+                'ext': 'flv',
+                'title': 'Burning a New iPhone 5 with Gasoline - Will it Survive?',
+            }
+        }
+    ]
+
+    def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0):
+
+        if try_num > self._MAX_TRIES - 1:
+            raise ExtractorError('Unable to extract video URL', expected=True)
+
+        form = {
+            'file': video_id,
+            'key': file_key,
+        }
+
+        if error_url:
+            form.update({
+                'numOfErrors': try_num,
+                'errorCode': '404',
+                'errorUrl': error_url,
+            })
+
+        data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode(form))
+        player_data = self._download_webpage(
+            data_url, video_id, 'Downloading player data')
+        data = compat_parse_qs(player_data)
+
+        try_num += 1
+
+        if 'error' in data:
+            raise ExtractorError(
+                '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),
+                expected=True)
+
+        title = data.get('title', [None])[0]
+        if title:
+            title = remove_end(title, '&asdasdas').strip()
+
+        video_url = data.get('url', [None])[0]
+
+        if video_url:
+            try:
+                self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')
+            except ExtractorError as e:
+                if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
+                    self.report_warning('Invalid video URL, requesting another', video_id)
+                    return self._extract_video(video_host, video_id, file_key, video_url, try_num)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+        }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_host = mobj.group('host')
+        video_id = mobj.group('id')
+
+        url = self._EMBED_URL % (video_host, video_id)
+        webpage = self._download_webpage(url, video_id)
+
+        file_key = self._search_regex(
+            r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
+
+        return self._extract_video(video_host, video_id, file_key)
index c81ce5a96f03b539d2f5e98975218fcdd0ed861d..035046120152f264278b4edc4bd5b11e0183da98 100644 (file)
@@ -43,14 +43,14 @@ class ComedyCentralShowsIE(InfoExtractor):
                           (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
                          ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
                           (?P<clip>
-                              (?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P<videotitle>[^/?#]+))
+                              (?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
                               |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
                               |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
                           )|
                           (?P<interview>
                               extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
                      (?:[?#].*|$)'''
-    _TEST = {
+    _TESTS = [{
         'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
         'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
         'info_dict': {
@@ -61,7 +61,34 @@ class ComedyCentralShowsIE(InfoExtractor):
             'uploader': 'thedailyshow',
             'title': 'thedailyshow kristen-stewart part 1',
         }
-    }
+    }, {
+        'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
+        'only_matching': True,
+    }, {
+        'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
+        'only_matching': True,
+    }, {
+        'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
+        'only_matching': True,
+    }, {
+        'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
+        'only_matching': True,
+    }, {
+        'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
+        'only_matching': True,
+    }, {
+        'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
+        'only_matching': True,
+    }, {
+        'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
+        'only_matching': True,
+    }, {
+        'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
+        'only_matching': True,
+    }, {
+        'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
+        'only_matching': True,
+    }]
 
     _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
 
@@ -185,6 +212,9 @@ class ComedyCentralShowsIE(InfoExtractor):
                     'ext': self._video_extensions.get(format, 'mp4'),
                     'height': h,
                     'width': w,
+
+                    'format_note': 'HTTP 400 at the moment (patches welcome!)',
+                    'preference': -100,
                 })
                 formats.append({
                     'format_id': 'rtmp-%s' % format,
index 69d5f687cbcfc913c1ee8ae3d8bc0a530b7202f0..929dd1e97efd70e5699dc333d222fe7a97a8de9a 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import base64
 import hashlib
 import json
@@ -114,7 +116,7 @@ class InfoExtractor(object):
     upload_date:    Video upload date (YYYYMMDD).
                     If not explicitly set, calculated from timestamp.
     uploader_id:    Nickname or id of the video uploader.
-    location:       Physical location of the video.
+    location:       Physical location where the video was filmed.
     subtitles:      The subtitle file contents as a dictionary in the format
                     {language: subtitles}.
     duration:       Length of the video in seconds, as an integer.
@@ -202,17 +204,17 @@ class InfoExtractor(object):
             self.report_download_webpage(video_id)
         elif note is not False:
             if video_id is None:
-                self.to_screen(u'%s' % (note,))
+                self.to_screen('%s' % (note,))
             else:
-                self.to_screen(u'%s: %s' % (video_id, note))
+                self.to_screen('%s: %s' % (video_id, note))
         try:
             return self._downloader.urlopen(url_or_request)
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
             if errnote is False:
                 return False
             if errnote is None:
-                errnote = u'Unable to download webpage'
-            errmsg = u'%s: %s' % (errnote, compat_str(err))
+                errnote = 'Unable to download webpage'
+            errmsg = '%s: %s' % (errnote, compat_str(err))
             if fatal:
                 raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
             else:
@@ -249,7 +251,7 @@ class InfoExtractor(object):
                 url = url_or_request.get_full_url()
             except AttributeError:
                 url = url_or_request
-            self.to_screen(u'Dumping request to ' + url)
+            self.to_screen('Dumping request to ' + url)
             dump = base64.b64encode(webpage_bytes).decode('ascii')
             self._downloader.to_screen(dump)
         if self._downloader.params.get('write_pages', False):
@@ -259,11 +261,11 @@ class InfoExtractor(object):
                 url = url_or_request
             basen = '%s_%s' % (video_id, url)
             if len(basen) > 240:
-                h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
+                h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
                 basen = basen[:240 - len(h)] + h
             raw_filename = basen + '.dump'
             filename = sanitize_filename(raw_filename, restricted=True)
-            self.to_screen(u'Saving request to ' + filename)
+            self.to_screen('Saving request to ' + filename)
             with open(filename, 'wb') as outf:
                 outf.write(webpage_bytes)
 
@@ -272,14 +274,14 @@ class InfoExtractor(object):
         except LookupError:
             content = webpage_bytes.decode('utf-8', 'replace')
 
-        if (u'<title>Access to this site is blocked</title>' in content and
-                u'Websense' in content[:512]):
-            msg = u'Access to this webpage has been blocked by Websense filtering software in your network.'
+        if ('<title>Access to this site is blocked</title>' in content and
+                'Websense' in content[:512]):
+            msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
             blocked_iframe = self._html_search_regex(
                 r'<iframe src="([^"]+)"', content,
-                u'Websense information URL', default=None)
+                'Websense information URL', default=None)
             if blocked_iframe:
-                msg += u' Visit %s for more details' % blocked_iframe
+                msg += ' Visit %s for more details' % blocked_iframe
             raise ExtractorError(msg, expected=True)
 
         return (content, urlh)
@@ -294,7 +296,7 @@ class InfoExtractor(object):
             return content
 
     def _download_xml(self, url_or_request, video_id,
-                      note=u'Downloading XML', errnote=u'Unable to download XML',
+                      note='Downloading XML', errnote='Unable to download XML',
                       transform_source=None, fatal=True):
         """Return the xml as an xml.etree.ElementTree.Element"""
         xml_string = self._download_webpage(
@@ -306,8 +308,8 @@ class InfoExtractor(object):
         return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
 
     def _download_json(self, url_or_request, video_id,
-                       note=u'Downloading JSON metadata',
-                       errnote=u'Unable to download JSON metadata',
+                       note='Downloading JSON metadata',
+                       errnote='Unable to download JSON metadata',
                        transform_source=None,
                        fatal=True):
         json_string = self._download_webpage(
@@ -322,29 +324,29 @@ class InfoExtractor(object):
             raise ExtractorError('Failed to download JSON', cause=ve)
 
     def report_warning(self, msg, video_id=None):
-        idstr = u'' if video_id is None else u'%s: ' % video_id
+        idstr = '' if video_id is None else '%s: ' % video_id
         self._downloader.report_warning(
-            u'[%s] %s%s' % (self.IE_NAME, idstr, msg))
+            '[%s] %s%s' % (self.IE_NAME, idstr, msg))
 
     def to_screen(self, msg):
         """Print msg to screen, prefixing it with '[ie_name]'"""
-        self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
+        self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg))
 
     def report_extraction(self, id_or_name):
         """Report information extraction."""
-        self.to_screen(u'%s: Extracting information' % id_or_name)
+        self.to_screen('%s: Extracting information' % id_or_name)
 
     def report_download_webpage(self, video_id):
         """Report webpage download."""
-        self.to_screen(u'%s: Downloading webpage' % video_id)
+        self.to_screen('%s: Downloading webpage' % video_id)
 
     def report_age_confirmation(self):
         """Report attempt to confirm age."""
-        self.to_screen(u'Confirming age')
+        self.to_screen('Confirming age')
 
     def report_login(self):
         """Report attempt to log in."""
-        self.to_screen(u'Logging in')
+        self.to_screen('Logging in')
 
     #Methods for following #608
     @staticmethod
@@ -384,7 +386,7 @@ class InfoExtractor(object):
                     break
 
         if os.name != 'nt' and sys.stderr.isatty():
-            _name = u'\033[0;34m%s\033[0m' % name
+            _name = '\033[0;34m%s\033[0m' % name
         else:
             _name = name
 
@@ -394,10 +396,10 @@ class InfoExtractor(object):
         elif default is not _NO_DEFAULT:
             return default
         elif fatal:
-            raise RegexNotFoundError(u'Unable to extract %s' % _name)
+            raise RegexNotFoundError('Unable to extract %s' % _name)
         else:
-            self._downloader.report_warning(u'unable to extract %s; '
-                u'please report this issue on http://yt-dl.org/bug' % _name)
+            self._downloader.report_warning('unable to extract %s; '
+                'please report this issue on http://yt-dl.org/bug' % _name)
             return None
 
     def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
@@ -436,7 +438,7 @@ class InfoExtractor(object):
                 else:
                     raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
             except (IOError, netrc.NetrcParseError) as err:
-                self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
+                self._downloader.report_warning('parsing .netrc: %s' % compat_str(err))
         
         return (username, password)
 
@@ -476,7 +478,7 @@ class InfoExtractor(object):
         return unescapeHTML(escaped)
 
     def _og_search_thumbnail(self, html, **kargs):
-        return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs)
+        return self._og_search_property('image', html, 'thumbnail url', fatal=False, **kargs)
 
     def _og_search_description(self, html, **kargs):
         return self._og_search_property('description', html, fatal=False, **kargs)
@@ -535,7 +537,7 @@ class InfoExtractor(object):
 
     def _sort_formats(self, formats):
         if not formats:
-            raise ExtractorError(u'No video formats found')
+            raise ExtractorError('No video formats found')
 
         def _formats_key(f):
             # TODO remove the following workaround
@@ -555,9 +557,9 @@ class InfoExtractor(object):
 
             if f.get('vcodec') == 'none':  # audio only
                 if self._downloader.params.get('prefer_free_formats'):
-                    ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus']
+                    ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
                 else:
-                    ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a']
+                    ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
                 ext_preference = 0
                 try:
                     audio_ext_preference = ORDER.index(f['ext'])
@@ -565,9 +567,9 @@ class InfoExtractor(object):
                     audio_ext_preference = -1
             else:
                 if self._downloader.params.get('prefer_free_formats'):
-                    ORDER = [u'flv', u'mp4', u'webm']
+                    ORDER = ['flv', 'mp4', 'webm']
                 else:
-                    ORDER = [u'webm', u'flv', u'mp4']
+                    ORDER = ['webm', 'flv', 'mp4']
                 try:
                     ext_preference = ORDER.index(f['ext'])
                 except ValueError:
@@ -609,7 +611,7 @@ class InfoExtractor(object):
 
     def _sleep(self, timeout, video_id, msg_template=None):
         if msg_template is None:
-            msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds'
+            msg_template = '%(video_id)s: Waiting for %(timeout)s seconds'
         msg = msg_template % {'video_id': video_id, 'timeout': timeout}
         self.to_screen(msg)
         time.sleep(timeout)
@@ -636,6 +638,61 @@ class InfoExtractor(object):
 
         return formats
 
+    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None):
+        formats = [{
+            'format_id': 'm3u8-meta',
+            'url': m3u8_url,
+            'ext': ext,
+            'protocol': 'm3u8',
+            'preference': -1,
+            'resolution': 'multiple',
+            'format_note': 'Quality selection URL',
+        }]
+
+        m3u8_doc = self._download_webpage(m3u8_url, video_id)
+        last_info = None
+        kv_rex = re.compile(
+            r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
+        for line in m3u8_doc.splitlines():
+            if line.startswith('#EXT-X-STREAM-INF:'):
+                last_info = {}
+                for m in kv_rex.finditer(line):
+                    v = m.group('val')
+                    if v.startswith('"'):
+                        v = v[1:-1]
+                    last_info[m.group('key')] = v
+            elif line.startswith('#') or not line.strip():
+                continue
+            else:
+                if last_info is None:
+                    formats.append({'url': line})
+                    continue
+                tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
+
+                f = {
+                    'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
+                    'url': line.strip(),
+                    'tbr': tbr,
+                    'ext': ext,
+                }
+                codecs = last_info.get('CODECS')
+                if codecs:
+                    # TODO: looks like video codec is not always necessarily goes first
+                    va_codecs = codecs.split(',')
+                    if va_codecs[0]:
+                        f['vcodec'] = va_codecs[0].partition('.')[0]
+                    if len(va_codecs) > 1 and va_codecs[1]:
+                        f['acodec'] = va_codecs[1].partition('.')[0]
+                resolution = last_info.get('RESOLUTION')
+                if resolution:
+                    width_str, height_str = resolution.split('x')
+                    f['width'] = int(width_str)
+                    f['height'] = int(height_str)
+                formats.append(f)
+                last_info = {}
+        self._sort_formats(formats)
+        return formats
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
@@ -655,7 +712,7 @@ class SearchInfoExtractor(InfoExtractor):
     def _real_extract(self, query):
         mobj = re.match(self._make_valid_url(), query)
         if mobj is None:
-            raise ExtractorError(u'Invalid search query "%s"' % query)
+            raise ExtractorError('Invalid search query "%s"' % query)
 
         prefix = mobj.group('prefix')
         query = mobj.group('query')
@@ -666,9 +723,9 @@ class SearchInfoExtractor(InfoExtractor):
         else:
             n = int(prefix)
             if n <= 0:
-                raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query))
+                raise ExtractorError('invalid download number %s for query "%s"' % (n, query))
             elif n > self._MAX_RESULTS:
-                self._downloader.report_warning(u'%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
+                self._downloader.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
                 n = self._MAX_RESULTS
             return self._get_n_results(query, n)
 
index 026a9177e754de7d606961e6e4793af86da49fe2..4903764f7008ec6f22c054cda2c73557c44b2a91 100644 (file)
@@ -5,6 +5,7 @@ import re
 import json
 import base64
 import zlib
+import xml.etree.ElementTree
 
 from hashlib import sha1
 from math import pow, sqrt, floor
@@ -17,6 +18,7 @@ from ..utils import (
     intlist_to_bytes,
     unified_strdate,
     clean_html,
+    urlencode_postdata,
 )
 from ..aes import (
     aes_cbc_decrypt,
@@ -51,6 +53,26 @@ class CrunchyrollIE(InfoExtractor):
         '1080': ('80', '108'),
     }
 
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+        self.report_login()
+        login_url = 'https://www.crunchyroll.com/?a=formhandler'
+        data = urlencode_postdata({
+            'formname': 'RpcApiUser_Login',
+            'name': username,
+            'password': password,
+        })
+        login_request = compat_urllib_request.Request(login_url, data)
+        login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        self._download_webpage(login_request, None, False, 'Wrong login info')
+
+
+    def _real_initialize(self):
+        self._login()
+
+
     def _decrypt_subtitles(self, data, iv, id):
         data = bytes_to_intlist(data)
         iv = bytes_to_intlist(iv)
@@ -97,6 +119,75 @@ class CrunchyrollIE(InfoExtractor):
             output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
         return output
 
+    def _convert_subtitles_to_ass(self, subtitles):
+        output = ''
+
+        def ass_bool(strvalue):
+            assvalue = '0'
+            if strvalue == '1':
+                assvalue = '-1'
+            return assvalue
+
+        sub_root = xml.etree.ElementTree.fromstring(subtitles)
+        if not sub_root:
+            return output
+
+        output = '[Script Info]\n'
+        output += 'Title: %s\n' % sub_root.attrib["title"]
+        output += 'ScriptType: v4.00+\n'
+        output += 'WrapStyle: %s\n' % sub_root.attrib["wrap_style"]
+        output += 'PlayResX: %s\n' % sub_root.attrib["play_res_x"]
+        output += 'PlayResY: %s\n' % sub_root.attrib["play_res_y"]
+        output += """ScaledBorderAndShadow: yes
+
+[V4+ Styles]
+Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
+"""
+        for style in sub_root.findall('./styles/style'):
+            output += 'Style: ' + style.attrib["name"]
+            output += ',' + style.attrib["font_name"]
+            output += ',' + style.attrib["font_size"]
+            output += ',' + style.attrib["primary_colour"]
+            output += ',' + style.attrib["secondary_colour"]
+            output += ',' + style.attrib["outline_colour"]
+            output += ',' + style.attrib["back_colour"]
+            output += ',' + ass_bool(style.attrib["bold"])
+            output += ',' + ass_bool(style.attrib["italic"])
+            output += ',' + ass_bool(style.attrib["underline"])
+            output += ',' + ass_bool(style.attrib["strikeout"])
+            output += ',' + style.attrib["scale_x"]
+            output += ',' + style.attrib["scale_y"]
+            output += ',' + style.attrib["spacing"]
+            output += ',' + style.attrib["angle"]
+            output += ',' + style.attrib["border_style"]
+            output += ',' + style.attrib["outline"]
+            output += ',' + style.attrib["shadow"]
+            output += ',' + style.attrib["alignment"]
+            output += ',' + style.attrib["margin_l"]
+            output += ',' + style.attrib["margin_r"]
+            output += ',' + style.attrib["margin_v"]
+            output += ',' + style.attrib["encoding"]
+            output += '\n'
+
+        output += """
+[Events]
+Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
+"""
+        for event in sub_root.findall('./events/event'):
+            output += 'Dialogue: 0'
+            output += ',' + event.attrib["start"]
+            output += ',' + event.attrib["end"]
+            output += ',' + event.attrib["style"]
+            output += ',' + event.attrib["name"]
+            output += ',' + event.attrib["margin_l"]
+            output += ',' + event.attrib["margin_r"]
+            output += ',' + event.attrib["margin_v"]
+            output += ',' + event.attrib["effect"]
+            output += ',' + event.attrib["text"]
+            output += '\n'
+
+        return output
+
     def _real_extract(self,url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('video_id')
@@ -158,6 +249,7 @@ class CrunchyrollIE(InfoExtractor):
             })
 
         subtitles = {}
+        sub_format = self._downloader.params.get('subtitlesformat', 'srt')
         for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
             sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
                                               video_id, note='Downloading subtitles for '+sub_name)
@@ -174,7 +266,10 @@ class CrunchyrollIE(InfoExtractor):
             lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
             if not lang_code:
                 continue
-            subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
+            if sub_format == 'ass':
+                subtitles[lang_code] = self._convert_subtitles_to_ass(subtitle)
+            else:
+                subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
 
         return {
             'id':          video_id,
index b6552c542411c2abf639e71c955c66c34db2b007..5411066846eb94b9c9295bae4f8860e07112b2d1 100644 (file)
@@ -34,6 +34,13 @@ class CSpanIE(InfoExtractor):
             'title': 'International Health Care Models',
             'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
         }
+    }, {
+        'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
+        'info_dict': {
+            'id': '342759',
+            'title': 'General Motors Ignition Switch Recall',
+        },
+        'playlist_duration_sum': 14855,
     }]
 
     def _real_extract(self, url):
index 5d0bfe454c9bfe1a3e16d1273b18ed3be2f436b8..66a8f16d99da8abebc9ffec39b9679b74566a068 100644 (file)
@@ -1,3 +1,6 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
 import re
 import json
 import itertools
@@ -28,51 +31,53 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
     """Information Extractor for Dailymotion"""
 
     _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
-    IE_NAME = u'dailymotion'
+    IE_NAME = 'dailymotion'
 
     _FORMATS = [
-        (u'stream_h264_ld_url', u'ld'),
-        (u'stream_h264_url', u'standard'),
-        (u'stream_h264_hq_url', u'hq'),
-        (u'stream_h264_hd_url', u'hd'),
-        (u'stream_h264_hd1080_url', u'hd180'),
+        ('stream_h264_ld_url', 'ld'),
+        ('stream_h264_url', 'standard'),
+        ('stream_h264_hq_url', 'hq'),
+        ('stream_h264_hd_url', 'hd'),
+        ('stream_h264_hd1080_url', 'hd180'),
     ]
 
     _TESTS = [
         {
-            u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
-            u'file': u'x33vw9.mp4',
-            u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
-            u'info_dict': {
-                u"uploader": u"Amphora Alex and Van .", 
-                u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
+            'url': 'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
+            'md5': '392c4b85a60a90dc4792da41ce3144eb',
+            'info_dict': {
+                'id': 'x33vw9',
+                'ext': 'mp4',
+                'uploader': 'Amphora Alex and Van .',
+                'title': 'Tutoriel de Youtubeur"DL DES VIDEO DE YOUTUBE"',
             }
         },
         # Vevo video
         {
-            u'url': u'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
-            u'file': u'USUV71301934.mp4',
-            u'info_dict': {
-                u'title': u'Roar (Official)',
-                u'uploader': u'Katy Perry',
-                u'upload_date': u'20130905',
+            'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
+            'info_dict': {
+                'title': 'Roar (Official)',
+                'id': 'USUV71301934',
+                'ext': 'mp4',
+                'uploader': 'Katy Perry',
+                'upload_date': '20130905',
             },
-            u'params': {
-                u'skip_download': True,
+            'params': {
+                'skip_download': True,
             },
-            u'skip': u'VEVO is only available in some countries',
+            'skip': 'VEVO is only available in some countries',
         },
         # age-restricted video
         {
-            u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
-            u'file': u'xyh2zz.mp4',
-            u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',
-            u'info_dict': {
-                u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
-                u'uploader': 'HotWaves1012',
-                u'age_limit': 18,
+            'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
+            'md5': '0d667a7b9cebecc3c89ee93099c4159d',
+            'info_dict': {
+                'id': 'xyh2zz',
+                'ext': 'mp4',
+                'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
+                'uploader': 'HotWaves1012',
+                'age_limit': 18,
             }
-
         }
     ]
 
@@ -97,8 +102,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
             webpage)
         if m_vevo is not None:
             vevo_id = m_vevo.group('id')
-            self.to_screen(u'Vevo video detected: %s' % vevo_id)
-            return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo')
+            self.to_screen('Vevo video detected: %s' % vevo_id)
+            return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
 
         age_limit = self._rta_search(webpage)
 
@@ -109,7 +114,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
 
         embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
         embed_page = self._download_webpage(embed_url, video_id,
-                                            u'Downloading embed page')
+                                            'Downloading embed page')
         info = self._search_regex(r'var info = ({.*?}),$', embed_page,
             'video info', flags=re.MULTILINE)
         info = json.loads(info)
@@ -134,7 +139,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
                     'height': height,
                 })
         if not formats:
-            raise ExtractorError(u'Unable to extract video URL')
+            raise ExtractorError('Unable to extract video URL')
 
         # subtitles
         video_subtitles = self.extract_subtitles(video_id, webpage)
@@ -143,7 +148,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
             return
 
         view_count = self._search_regex(
-            r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False)
+            r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, 'view count', fatal=False)
         if view_count is not None:
             view_count = str_to_int(view_count)
 
@@ -165,28 +170,35 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
                 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
                 video_id, note=False)
         except ExtractorError as err:
-            self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
+            self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
             return {}
         info = json.loads(sub_list)
         if (info['total'] > 0):
             sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
             return sub_lang_list
-        self._downloader.report_warning(u'video doesn\'t have subtitles')
+        self._downloader.report_warning('video doesn\'t have subtitles')
         return {}
 
 
 class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
-    IE_NAME = u'dailymotion:playlist'
+    IE_NAME = 'dailymotion:playlist'
     _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
     _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
     _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
+    _TESTS = [{
+        'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
+        'info_dict': {
+            'title': 'SPORT',
+        },
+        'playlist_mincount': 20,
+    }]
 
     def _extract_entries(self, id):
         video_ids = []
         for pagenum in itertools.count(1):
             request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))
             webpage = self._download_webpage(request,
-                                             id, u'Downloading page %s' % pagenum)
+                                             id, 'Downloading page %s' % pagenum)
 
             video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))
 
@@ -209,9 +221,17 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
 
 
 class DailymotionUserIE(DailymotionPlaylistIE):
-    IE_NAME = u'dailymotion:user'
+    IE_NAME = 'dailymotion:user'
     _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
     _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
+    _TESTS = [{
+        'url': 'https://www.dailymotion.com/user/nqtv',
+        'info_dict': {
+            'id': 'nqtv',
+            'title': 'Rémi Gaillard',
+        },
+        'playlist_mincount': 100,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -219,7 +239,7 @@ class DailymotionUserIE(DailymotionPlaylistIE):
         webpage = self._download_webpage(url, user)
         full_user = unescapeHTML(self._html_search_regex(
             r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
-            webpage, u'user', flags=re.DOTALL))
+            webpage, 'user'))
 
         return {
             '_type': 'playlist',
index 6033cd94a1b251d66e7a3f80034bc58b79fa4b55..45d66e2e663fa376cec8f4fc7931e84006ee30b9 100644 (file)
@@ -11,10 +11,10 @@ from ..utils import (
 
 
 class DaumIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:v/|.*?clipid=)(?P<id>[^?#&]+)'
     IE_NAME = 'daum.net'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
         'info_dict': {
             'id': '52554690',
@@ -24,11 +24,17 @@ class DaumIE(InfoExtractor):
             'upload_date': '20130831',
             'duration': 3868,
         },
-    }
+    }, {
+        'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
+        'only_matching': True,
+    }, {
+        'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = mobj.group('id')
         canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
         webpage = self._download_webpage(canonical_url, video_id)
         full_id = self._search_regex(
@@ -42,7 +48,6 @@ class DaumIE(InfoExtractor):
             'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
             video_id, 'Downloading video formats info')
 
-        self.to_screen(u'%s: Getting video urls' % video_id)
         formats = []
         for format_el in urls.findall('result/output_list/output_list'):
             profile = format_el.attrib['profile']
@@ -52,7 +57,7 @@ class DaumIE(InfoExtractor):
             })
             url_doc = self._download_xml(
                 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
-                video_id, note=False)
+                video_id, note='Downloading video data for %s format' % profile)
             format_url = url_doc.find('result/url').text
             formats.append({
                 'url': format_url,
diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py
new file mode 100644 (file)
index 0000000..1d3e2ff
--- /dev/null
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    clean_html,
+)
+
+
+class DBTVIE(InfoExtractor):
+    _VALID_URL = r'http://dbtv\.no/(?P<id>[0-9]+)#(?P<display_id>.+)'
+    _TEST = {
+        'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
+        'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc',
+        'info_dict': {
+            'id': '33100',
+            'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
+            'ext': 'mp4',
+            'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
+            'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'timestamp': 1404039863.438,
+            'upload_date': '20140629',
+            'duration': 69.544,
+            'view_count': int,
+            'categories': list,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        data = self._download_json(
+            'http://api.dbtv.no/discovery/%s' % video_id, display_id)
+
+        video = data['playlist'][0]
+
+        formats = [{
+            'url': f['URL'],
+            'vcodec': f.get('container'),
+            'width': int_or_none(f.get('width')),
+            'height': int_or_none(f.get('height')),
+            'vbr': float_or_none(f.get('rate'), 1000),
+            'filesize': int_or_none(f.get('size')),
+        } for f in video['renditions'] if 'URL' in f]
+
+        if not formats:
+            for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]:
+                if url_key in video:
+                    formats.append({
+                        'url': video[url_key],
+                        'format_id': format_id,
+                    })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video['id'],
+            'display_id': display_id,
+            'title': video['title'],
+            'description': clean_html(video['desc']),
+            'thumbnail': video.get('splash') or video.get('thumb'),
+            'timestamp': float_or_none(video.get('publishedAt'), 1000),
+            'duration': float_or_none(video.get('length'), 1000),
+            'view_count': int_or_none(video.get('views')),
+            'categories': video.get('tags'),
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/deezer.py b/youtube_dl/extractor/deezer.py
new file mode 100644 (file)
index 0000000..c3205ff
--- /dev/null
@@ -0,0 +1,89 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    orderedSet,
+)
+
+
+class DeezerPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?deezer\.com/playlist/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.deezer.com/playlist/176747451',
+        'info_dict': {
+            'id': '176747451',
+            'title': 'Best!',
+            'uploader': 'Anonymous',
+            'thumbnail': 're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$',
+        },
+        'playlist_count': 30,
+        'skip': 'Only available in .de',
+    }
+
+    def _real_extract(self, url):
+        if 'test' not in self._downloader.params:
+            self._downloader.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!')
+
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, playlist_id)
+        geoblocking_msg = self._html_search_regex(
+            r'<p class="soon-txt">(.*?)</p>', webpage, 'geoblocking message',
+            default=None)
+        if geoblocking_msg is not None:
+            raise ExtractorError(
+                'Deezer said: %s' % geoblocking_msg, expected=True)
+
+        data_json = self._search_regex(
+            r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n', webpage, 'data JSON')
+        data = json.loads(data_json)
+
+        playlist_title = data.get('DATA', {}).get('TITLE')
+        playlist_uploader = data.get('DATA', {}).get('PARENT_USERNAME')
+        playlist_thumbnail = self._search_regex(
+            r'<img id="naboo_playlist_image".*?src="([^"]+)"', webpage,
+            'playlist thumbnail')
+
+        preview_pattern = self._search_regex(
+            r"var SOUND_PREVIEW_GATEWAY\s*=\s*'([^']+)';", webpage,
+            'preview URL pattern', fatal=False)
+        entries = []
+        for s in data['SONGS']['data']:
+            puid = s['MD5_ORIGIN']
+            preview_video_url = preview_pattern.\
+                replace('{0}', puid[0]).\
+                replace('{1}', puid).\
+                replace('{2}', s['MEDIA_VERSION'])
+            formats = [{
+                'format_id': 'preview',
+                'url': preview_video_url,
+                'preference': -100,  # Only the first 30 seconds
+                'ext': 'mp3',
+            }]
+            self._sort_formats(formats)
+            artists = ', '.join(
+                orderedSet(a['ART_NAME'] for a in s['ARTISTS']))
+            entries.append({
+                'id': s['SNG_ID'],
+                'duration': int_or_none(s.get('DURATION')),
+                'title': '%s - %s' % (artists, s['SNG_TITLE']),
+                'uploader': s['ART_NAME'],
+                'uploader_id': s['ART_ID'],
+                'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0,
+                'formats': formats,
+            })
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': playlist_title,
+            'uploader': playlist_uploader,
+            'thumbnail': playlist_thumbnail,
+            'entries': entries,
+        }
index 9f569aa932967910e12b46c0d0269557437d0c79..1e1763abf79b833ddc8e982d7a9acbbd080785a7 100644 (file)
@@ -11,8 +11,7 @@ from ..utils import compat_urllib_parse_unquote
 class DropboxIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
     _TEST = {
-        'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4',
-        'md5': '8a3d905427a6951ccb9eb292f154530b',
+        'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
         'info_dict': {
             'id': 'nelirfsxnmcfbfh',
             'ext': 'mp4',
@@ -25,7 +24,9 @@ class DropboxIE(InfoExtractor):
         video_id = mobj.group('id')
         fn = compat_urllib_parse_unquote(mobj.group('title'))
         title = os.path.splitext(fn)[0]
-        video_url = url + '?dl=1'
+        video_url = (
+            re.sub(r'[?&]dl=0', '', url) +
+            ('?' if '?' in url else '&') + 'dl=1')
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py
new file mode 100644 (file)
index 0000000..d5bfd7f
--- /dev/null
@@ -0,0 +1,70 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import str_to_int
+
+
+class DrTuberIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<display_id>[\w-]+)'
+    _TEST = {
+        'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
+        'md5': '93e680cf2536ad0dfb7e74d94a89facd',
+        'info_dict': {
+            'id': '1740434',
+            'display_id': 'hot-perky-blonde-naked-golf',
+            'ext': 'mp4',
+            'title': 'Hot Perky Blonde Naked Golf',
+            'like_count': int,
+            'dislike_count': int,
+            'comment_count': int,
+            'categories': list,  # NSFW
+            'thumbnail': 're:https?://.*\.jpg$',
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_url = self._html_search_regex(
+            r'<source src="([^"]+)"', webpage, 'video URL')
+
+        title = self._html_search_regex(
+            r'<title>([^<]+)\s*-\s*Free', webpage, 'title')
+
+        thumbnail = self._html_search_regex(
+            r'poster="([^"]+)"',
+            webpage, 'thumbnail', fatal=False)
+
+        like_count = str_to_int(self._html_search_regex(
+            r'<span id="rate_likes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
+            webpage, 'like count', fatal=False))
+        dislike_count = str_to_int(self._html_search_regex(
+            r'<span id="rate_dislikes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
+            webpage, 'like count', fatal=False))
+        comment_count = str_to_int(self._html_search_regex(
+            r'<span class="comments_count">([\d,\.]+)</span>',
+            webpage, 'comment count', fatal=False))
+
+        cats_str = self._html_search_regex(
+            r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
+        categories = None if cats_str is None else cats_str.split(' ')
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'comment_count': comment_count,
+            'categories': categories,
+            'age_limit': self._rta_search(webpage),
+        }
index cdccfd376b80ee5ebc61c25ab4cd00e12dcfc458..9d6ce1f48cd41c390da308768f131b6c44521629 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import parse_iso8601
 
 
 class DRTVIE(SubtitlesInfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?dr\.dk/tv/se/[^/]+/(?P<id>[\da-z-]+)'
+    _VALID_URL = r'http://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)+(?P<id>[\da-z-]+)(?:[/#?]|$)'
 
     _TEST = {
         'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
index 88f5526b8a59491cc6cd40b48fe9451b3fc2d12b..c1b4c729ef5888da0bdcebf692cbddad30dee4df 100644 (file)
@@ -1,10 +1,13 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
 import json
 import random
 import re
 
 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
+    compat_str,
 )
 
 
@@ -12,86 +15,98 @@ class EightTracksIE(InfoExtractor):
     IE_NAME = '8tracks'
     _VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
     _TEST = {
-        u"name": u"EightTracks",
-        u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
-        u"playlist": [
+        "name": "EightTracks",
+        "url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
+        "info_dict": {
+            'id': '1336550',
+            'display_id': 'youtube-dl-test-tracks-a',
+            "description": "test chars:  \"'/\\ä↭",
+            "title": "youtube-dl test tracks \"'/\\ä↭<>",
+        },
+        "playlist": [
             {
-                u"file": u"11885610.m4a",
-                u"md5": u"96ce57f24389fc8734ce47f4c1abcc55",
-                u"info_dict": {
-                    u"title": u"youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
-                    u"uploader_id": u"ytdl"
+                "md5": "96ce57f24389fc8734ce47f4c1abcc55",
+                "info_dict": {
+                    "id": "11885610",
+                    "ext": "m4a",
+                    "title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
+                    "uploader_id": "ytdl"
                 }
             },
             {
-                u"file": u"11885608.m4a",
-                u"md5": u"4ab26f05c1f7291ea460a3920be8021f",
-                u"info_dict": {
-                    u"title": u"youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
-                    u"uploader_id": u"ytdl"
+                "md5": "4ab26f05c1f7291ea460a3920be8021f",
+                "info_dict": {
+                    "id": "11885608",
+                    "ext": "m4a",
+                    "title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
+                    "uploader_id": "ytdl"
                 }
             },
             {
-                u"file": u"11885679.m4a",
-                u"md5": u"d30b5b5f74217410f4689605c35d1fd7",
-                u"info_dict": {
-                    u"title": u"youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
-                    u"uploader_id": u"ytdl"
+                "md5": "d30b5b5f74217410f4689605c35d1fd7",
+                "info_dict": {
+                    "id": "11885679",
+                    "ext": "m4a",
+                    "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
+                    "uploader_id": "ytdl"
                 }
             },
             {
-                u"file": u"11885680.m4a",
-                u"md5": u"4eb0a669317cd725f6bbd336a29f923a",
-                u"info_dict": {
-                    u"title": u"youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
-                    u"uploader_id": u"ytdl"
+                "md5": "4eb0a669317cd725f6bbd336a29f923a",
+                "info_dict": {
+                    "id": "11885680",
+                    "ext": "m4a",
+                    "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
+                    "uploader_id": "ytdl"
                 }
             },
             {
-                u"file": u"11885682.m4a",
-                u"md5": u"1893e872e263a2705558d1d319ad19e8",
-                u"info_dict": {
-                    u"title": u"PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
-                    u"uploader_id": u"ytdl"
+                "md5": "1893e872e263a2705558d1d319ad19e8",
+                "info_dict": {
+                    "id": "11885682",
+                    "ext": "m4a",
+                    "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
+                    "uploader_id": "ytdl"
                 }
             },
             {
-                u"file": u"11885683.m4a",
-                u"md5": u"b673c46f47a216ab1741ae8836af5899",
-                u"info_dict": {
-                    u"title": u"PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
-                    u"uploader_id": u"ytdl"
+                "md5": "b673c46f47a216ab1741ae8836af5899",
+                "info_dict": {
+                    "id": "11885683",
+                    "ext": "m4a",
+                    "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
+                    "uploader_id": "ytdl"
                 }
             },
             {
-                u"file": u"11885684.m4a",
-                u"md5": u"1d74534e95df54986da7f5abf7d842b7",
-                u"info_dict": {
-                    u"title": u"phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
-                    u"uploader_id": u"ytdl"
+                "md5": "1d74534e95df54986da7f5abf7d842b7",
+                "info_dict": {
+                    "id": "11885684",
+                    "ext": "m4a",
+                    "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
+                    "uploader_id": "ytdl"
                 }
             },
             {
-                u"file": u"11885685.m4a",
-                u"md5": u"f081f47af8f6ae782ed131d38b9cd1c0",
-                u"info_dict": {
-                    u"title": u"phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
-                    u"uploader_id": u"ytdl"
+                "md5": "f081f47af8f6ae782ed131d38b9cd1c0",
+                "info_dict": {
+                    "id": "11885685",
+                    "ext": "m4a",
+                    "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
+                    "uploader_id": "ytdl"
                 }
             }
         ]
     }
 
-
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
         playlist_id = mobj.group('id')
 
         webpage = self._download_webpage(url, playlist_id)
 
-        json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
+        json_like = self._search_regex(
+            r"(?s)PAGE.mix = (.*?);\n", webpage, 'trax information')
         data = json.loads(json_like)
 
         session = str(random.randint(0, 1000000000))
@@ -99,21 +114,30 @@ class EightTracksIE(InfoExtractor):
         track_count = data['tracks_count']
         first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
         next_url = first_url
-        res = []
+        entries = []
         for i in range(track_count):
-            api_json = self._download_webpage(next_url, playlist_id,
-                note=u'Downloading song information %s/%s' % (str(i+1), track_count),
-                errnote=u'Failed to download song information')
+            api_json = self._download_webpage(
+                next_url, playlist_id,
+                note='Downloading song information %d/%d' % (i + 1, track_count),
+                errnote='Failed to download song information')
             api_data = json.loads(api_json)
-            track_data = api_data[u'set']['track']
+            track_data = api_data['set']['track']
             info = {
-                'id': track_data['id'],
+                'id': compat_str(track_data['id']),
                 'url': track_data['track_file_stream_url'],
                 'title': track_data['performer'] + u' - ' + track_data['name'],
                 'raw_title': track_data['name'],
                 'uploader_id': data['user']['login'],
                 'ext': 'm4a',
             }
-            res.append(info)
-            next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
-        return res
+            entries.append(info)
+            next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (
+                session, mix_id, track_data['id'])
+        return {
+            '_type': 'playlist',
+            'entries': entries,
+            'id': compat_str(mix_id),
+            'display_id': playlist_id,
+            'title': data.get('name'),
+            'description': data.get('description'),
+        }
index e6952588fbdfa08167935fc2b1c0381804328943..70f8efe27578c4d43b27378a4a2c80d495a7488c 100644 (file)
@@ -1,54 +1,25 @@
 from __future__ import unicode_literals
 
-import re
+from .tnaflix import TNAFlixIE
 
-from .common import InfoExtractor
 
+class EMPFlixIE(TNAFlixIE):
+    _VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<display_id>[0-9a-zA-Z-]+)-(?P<id>[0-9]+)\.html'
+
+    _TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"'
+    _DESCRIPTION_REGEX = r'name="description" value="([^"]*)"'
+    _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
 
-class EmpflixIE(InfoExtractor):
-    _VALID_URL = r'^https?://www\.empflix\.com/videos/.*?-(?P<id>[0-9]+)\.html'
     _TEST = {
         'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
         'md5': 'b1bc15b6412d33902d6e5952035fcabc',
         'info_dict': {
             'id': '33051',
+            'display_id': 'Amateur-Finger-Fuck',
             'ext': 'mp4',
             'title': 'Amateur Finger Fuck',
             'description': 'Amateur solo finger fucking.',
+            'thumbnail': 're:https?://.*\.jpg$',
             'age_limit': 18,
         }
     }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, video_id)
-        age_limit = self._rta_search(webpage)
-
-        video_title = self._html_search_regex(
-            r'name="title" value="(?P<title>[^"]*)"', webpage, 'title')
-        video_description = self._html_search_regex(
-            r'name="description" value="([^"]*)"', webpage, 'description', fatal=False)
-
-        cfg_url = self._html_search_regex(
-            r'flashvars\.config = escape\("([^"]+)"',
-            webpage, 'flashvars.config')
-
-        cfg_xml = self._download_xml(
-            cfg_url, video_id, note='Downloading metadata')
-
-        formats = [
-            {
-                'url': item.find('videoLink').text,
-                'format_id': item.find('res').text,
-            } for item in cfg_xml.findall('./quality/item')
-        ]
-
-        return {
-            'id': video_id,
-            'title': video_title,
-            'description': video_description,
-            'formats': formats,
-            'age_limit': age_limit,
-        }
diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py
new file mode 100644 (file)
index 0000000..522aa3d
--- /dev/null
@@ -0,0 +1,75 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    str_to_int,
+)
+
+
+class EpornerIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)'
+    _TEST = {
+        'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
+        'md5': '3b427ae4b9d60619106de3185c2987cd',
+        'info_dict': {
+            'id': '95008',
+            'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
+            'ext': 'flv',
+            'title': 'Infamous Tiffany Teen Strip Tease Video',
+            'duration': 194,
+            'view_count': int,
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+        title = self._html_search_regex(
+            r'<title>(.*?) - EPORNER', webpage, 'title')
+
+        redirect_code = self._html_search_regex(
+            r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,
+            webpage, 'redirect_code')
+        redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
+        player_code = self._download_webpage(
+            redirect_url, display_id, note='Downloading player config')
+
+        sources = self._search_regex(
+            r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', player_code, 'sources')
+
+        formats = []
+        for video_url, format_id in re.findall(r'file\s*:\s*"([^"]+)",\s*label\s*:\s*"([^"]+)"', sources):
+            fmt = {
+                'url': video_url,
+                'format_id': format_id,
+            }
+            m = re.search(r'^(\d+)', format_id)
+            if m:
+                fmt['height'] = int(m.group(1))
+            formats.append(fmt)
+        self._sort_formats(formats)
+
+        duration = parse_duration(self._search_regex(
+            r'class="mbtim">([0-9:]+)</div>', webpage, 'duration',
+            fatal=False))
+        view_count = str_to_int(self._search_regex(
+            r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
+            webpage, 'view count', fatal=False))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'duration': duration,
+            'view_count': view_count,
+            'formats': formats,
+            'age_limit': self._rta_search(webpage),
+        }
index 12829cbcc0631de4eea1bc4d84d4702b6da54281..d237a82813ea2556175e32a882d87bd5d1831924 100644 (file)
@@ -12,10 +12,11 @@ from ..utils import (
 class EveryonesMixtapeIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
-        'file': '5bfseWNmlds.mp4',
         "info_dict": {
+            'id': '5bfseWNmlds',
+            'ext': 'mp4',
             "title": "Passion Pit - \"Sleepyhead\" (Official Music Video)",
             "uploader": "FKR.TV",
             "uploader_id": "frenchkissrecords",
@@ -25,7 +26,14 @@ class EveryonesMixtapeIE(InfoExtractor):
         'params': {
             'skip_download': True,  # This is simply YouTube
         }
-    }
+    }, {
+        'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi',
+        'info_dict': {
+            'id': 'm7m0jJAbMQi',
+            'title': 'Driving',
+        },
+        'playlist_count': 24
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index f7cf700b5df8ecbcd714fd23db3dc7ff477adb73..60e68d98ac68ec5f0ccff4413af70a54bfd75ced 100644 (file)
@@ -12,8 +12,8 @@ from ..utils import (
     compat_urllib_parse,
     compat_urllib_request,
     urlencode_postdata,
-
     ExtractorError,
+    limit_length,
 )
 
 
@@ -21,23 +21,34 @@ class FacebookIE(InfoExtractor):
     _VALID_URL = r'''(?x)
         https?://(?:\w+\.)?facebook\.com/
         (?:[^#]*?\#!/)?
-        (?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
+        (?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?)
         (?:v|video_id)=(?P<id>[0-9]+)
         (?:.*)'''
     _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
     _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
     _NETRC_MACHINE = 'facebook'
     IE_NAME = 'facebook'
-    _TEST = {
-        'url': 'https://www.facebook.com/photo.php?v=120708114770723',
-        'md5': '48975a41ccc4b7a581abd68651c1a5a8',
+    _TESTS = [{
+        'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
+        'md5': '6a40d33c0eccbb1af76cf0485a052659',
+        'info_dict': {
+            'id': '637842556329505',
+            'ext': 'mp4',
+            'duration': 38,
+            'title': 'Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam fin...',
+        }
+    }, {
+        'note': 'Video without discernible title',
+        'url': 'https://www.facebook.com/video.php?v=274175099429670',
         'info_dict': {
-            'id': '120708114770723',
+            'id': '274175099429670',
             'ext': 'mp4',
-            'duration': 279,
-            'title': 'PEOPLE ARE AWESOME 2013',
+            'title': 'Facebook video #274175099429670',
         }
-    }
+    }, {
+        'url': 'https://www.facebook.com/video.php?v=10204634152394104',
+        'only_matching': True,
+    }]
 
     def _login(self):
         (useremail, password) = self._get_login_info()
@@ -76,7 +87,8 @@ class FacebookIE(InfoExtractor):
 
             check_form = {
                 'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
-                'h': self._search_regex(r'name="h" value="(\w*?)"', login_results, 'h'),
+                'h': self._search_regex(
+                    r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'),
                 'name_action_selected': 'dont_save',
             }
             check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
@@ -121,7 +133,15 @@ class FacebookIE(InfoExtractor):
             raise ExtractorError('Cannot find video URL')
 
         video_title = self._html_search_regex(
-            r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title')
+            r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
+            fatal=False)
+        if not video_title:
+            video_title = self._html_search_regex(
+                r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
+                webpage, 'alternative title', default=None)
+            video_title = limit_length(video_title, 80)
+        if not video_title:
+            video_title = 'Facebook video #%s' % video_id
 
         return {
             'id': video_id,
index 3a908d01f23dbfac41ea124c65336cc12eb79e86..2bfa20606cd7846b0d15e8c441de3fce2a8982f6 100644 (file)
@@ -12,6 +12,7 @@ from ..utils import (
     compat_urlparse,
     compat_xml_parse_error,
 
+    determine_ext,
     ExtractorError,
     float_or_none,
     HEADRequest,
@@ -351,7 +352,36 @@ class GenericIE(InfoExtractor):
                 'description': 're:'
             },
             'playlist_mincount': 11,
-        }
+        },
+        # Multiple brightcove videos
+        # https://github.com/rg3/youtube-dl/issues/2283
+        {
+            'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
+            'info_dict': {
+                'id': 'always-never',
+                'title': 'Always / Never - The New Yorker',
+            },
+            'playlist_count': 3,
+            'params': {
+                'extract_flat': False,
+                'skip_download': True,
+            }
+        },
+        # MLB embed
+        {
+            'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
+            'md5': '96f09a37e44da40dd083e12d9a683327',
+            'info_dict': {
+                'id': '33322633',
+                'ext': 'mp4',
+                'title': 'Ump changes call to ball',
+                'description': 'md5:71c11215384298a172a6dcb4c2e20685',
+                'duration': 48,
+                'timestamp': 1401537900,
+                'upload_date': '20140531',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+        },
     ]
 
     def report_download_webpage(self, video_id):
@@ -598,7 +628,7 @@ class GenericIE(InfoExtractor):
                 embedSWF\(?:\s*
             )
             (["\'])
-                (?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
+                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
                 (?:embed|v)/.+?)
             \1''', webpage)
         if matches:
@@ -794,6 +824,12 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'SBS')
 
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://m\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'MLB')
+
         # Start with something easy: JW Player in SWFObject
         found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
         if not found:
@@ -830,13 +866,14 @@ class GenericIE(InfoExtractor):
             if m_video_type is not None:
                 def check_video(vurl):
                     vpath = compat_urlparse.urlparse(vurl).path
-                    return '.' in vpath and not vpath.endswith('.swf')
+                    vext = determine_ext(vpath)
+                    return '.' in vpath and vext not in ('swf', 'png', 'jpg')
                 found = list(filter(
                     check_video,
                     re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)))
         if not found:
             # HTML5 video
-            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage)
+            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]+)? src="([^"]+)"', webpage)
         if not found:
             found = re.search(
                 r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
diff --git a/youtube_dl/extractor/hornbunny.py b/youtube_dl/extractor/hornbunny.py
new file mode 100644 (file)
index 0000000..7e77144
--- /dev/null
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_duration,
+)
+
+
+class HornBunnyIE(InfoExtractor):
+    _VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html'
+    _TEST = {
+        'url': 'http://hornbunny.com/videos/panty-slut-jerk-off-instruction-5227.html',
+        'md5': '95e40865aedd08eff60272b704852ad7',
+        'info_dict': {
+            'id': '5227',
+            'ext': 'flv',
+            'title': 'panty slut jerk off instruction',
+            'duration': 550,
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(
+            url, video_id, note='Downloading initial webpage')
+        title = self._html_search_regex(
+            r'class="title">(.*?)</h2>', webpage, 'title')
+        redirect_url = self._html_search_regex(
+            r'pg&settings=(.*?)\|0"\);', webpage, 'title')
+        webpage2 = self._download_webpage(redirect_url, video_id)
+        video_url = self._html_search_regex(
+            r'flvMask:(.*?);', webpage2, 'video_url')
+        
+        duration = parse_duration(self._search_regex(
+            r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
+            webpage, 'duration', fatal=False))
+        view_count = int_or_none(self._search_regex(
+            r'<strong>Views:</strong>\s*(\d+)</div>',
+            webpage, 'view count', fatal=False))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'ext': 'flv',
+            'duration': duration,
+            'view_count': view_count,
+            'age_limit': 18,
+        }
diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py
new file mode 100644 (file)
index 0000000..8e812b6
--- /dev/null
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    compat_urllib_request,
+    int_or_none,
+    urlencode_postdata,
+)
+
+
+class HostingBulkIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?hostingbulk\.com/
+        (?:embed-)?(?P<id>[A-Za-z0-9]{12})(?:-\d+x\d+)?\.html'''
+    _FILE_DELETED_REGEX = r'<b>File Not Found</b>'
+    _TEST = {
+        'url': 'http://hostingbulk.com/n0ulw1hv20fm.html',
+        'md5': '6c8653c8ecf7ebfa83b76e24b7b2fe3f',
+        'info_dict': {
+            'id': 'n0ulw1hv20fm',
+            'ext': 'mp4',
+            'title': 'md5:5afeba33f48ec87219c269e054afd622',
+            'filesize': 6816081,
+            'thumbnail': 're:^http://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        url = 'http://hostingbulk.com/{0:}.html'.format(video_id)
+
+        # Custom request with cookie to set language to English, so our file
+        # deleted regex would work.
+        request = compat_urllib_request.Request(
+            url, headers={'Cookie': 'lang=english'})
+        webpage = self._download_webpage(request, video_id)
+
+        if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
+            raise ExtractorError('Video %s does not exist' % video_id,
+                                 expected=True)
+
+        title = self._html_search_regex(r'<h3>(.*?)</h3>', webpage, 'title')
+        filesize = int_or_none(
+            self._search_regex(
+                r'<small>\((\d+)\sbytes?\)</small>',
+                webpage,
+                'filesize',
+                fatal=False
+            )
+        )
+        thumbnail = self._search_regex(
+            r'<img src="([^"]+)".+?class="pic"',
+            webpage, 'thumbnail', fatal=False)
+
+        fields = dict(re.findall(r'''(?x)<input\s+
+            type="hidden"\s+
+            name="([^"]+)"\s+
+            value="([^"]*)"
+            ''', webpage))
+
+        request = compat_urllib_request.Request(url, urlencode_postdata(fields))
+        request.add_header('Content-type', 'application/x-www-form-urlencoded')
+        response = self._request_webpage(request, video_id,
+                                         'Submiting download request')
+        video_url = response.geturl()
+
+        formats = [{
+            'format_id': 'sd',
+            'filesize': filesize,
+            'url': video_url,
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
index 1f42c6d3a957674aa7bb2ee4ee3d56dac43cd2f8..9e8b69f57c67f38d382d3528f5f1a94c38121f0f 100644 (file)
@@ -18,6 +18,7 @@ class IGNIE(InfoExtractor):
     _DESCRIPTION_RE = [
         r'<span class="page-object-description">(.+?)</span>',
         r'id="my_show_video">.*?<p>(.*?)</p>',
+        r'<meta name="description" content="(.*?)"',
     ]
 
     _TESTS = [
@@ -55,6 +56,17 @@ class IGNIE(InfoExtractor):
                 'skip_download': True,
             },
         },
+        {
+            'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
+            'md5': '4e9a0bda1e5eebd31ddcf86ec0b9b3c7',
+            'info_dict': {
+                'id': '078fdd005f6d3c02f63d795faa1b984f',
+                'ext': 'mp4',
+                'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
+                'description': 'Giant skeletons, bloody hunts, and captivating'
+                    ' natural beauty take our breath away.',
+            },
+        },
     ]
 
     def _find_video_id(self, webpage):
@@ -62,6 +74,7 @@ class IGNIE(InfoExtractor):
             r'data-video-id="(.+?)"',
             r'<object id="vid_(.+?)"',
             r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
+            r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
         ]
         return self._search_regex(res_id, webpage, 'video id')
 
@@ -70,10 +83,7 @@ class IGNIE(InfoExtractor):
         name_or_id = mobj.group('name_or_id')
         page_type = mobj.group('type')
         webpage = self._download_webpage(url, name_or_id)
-        if page_type == 'articles':
-            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, 'video url')
-            return self.url_result(video_url, ie='IGN')
-        elif page_type != 'video':
+        if page_type != 'video':
             multiple_urls = re.findall(
                 '<param name="flashvars" value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
                 webpage)
index 7cee505c085cd1601e0b8ce3ab689795b4f94dfd..4536db3bfca1e1244e70089bea30de9687d923f0 100644 (file)
@@ -63,6 +63,14 @@ class ImdbListIE(InfoExtractor):
     IE_NAME = 'imdb:list'
     IE_DESC = 'Internet Movie Database lists'
     _VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
+    _TEST = {
+        'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
+        'info_dict': {
+            'id': 'JFs9NWw6XI0',
+            'title': 'March 23, 2012 Releases',
+        },
+        'playlist_count': 7,
+    }
     
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index b5372bf7a24e48a347127a1dc76c9dc672b32b64..5109f26ce860edc0675eaba6350e0ab820e7fe27 100644 (file)
@@ -46,6 +46,30 @@ class InstagramUserIE(InfoExtractor):
     _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
     IE_DESC = 'Instagram user profile'
     IE_NAME = 'instagram:user'
+    _TEST = {
+        'url': 'http://instagram.com/porsche',
+        'info_dict': {
+            'id': 'porsche',
+            'title': 'porsche',
+        },
+        'playlist_mincount': 2,
+        'playlist': [{
+            'info_dict': {
+                'id': '614605558512799803_462752227',
+                'ext': 'mp4',
+                'title': '#Porsche Intelligent Performance.',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'uploader': 'Porsche',
+                'uploader_id': 'porsche',
+                'timestamp': 1387486713,
+                'upload_date': '20131219',
+            },
+        }],
+        'params': {
+            'extract_flat': True,
+            'skip_download': True,
+        }
+    }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index 4027deb7071806fcba313a80cebe694e9f96580e..75b543b7cf8ed443bb98f3cd5c492e1c629c28a3 100644 (file)
@@ -127,6 +127,21 @@ class IviCompilationIE(InfoExtractor):
     IE_DESC = 'ivi.ru compilations'
     IE_NAME = 'ivi:compilation'
     _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
+    _TESTS = [{
+        'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
+        'info_dict': {
+            'id': 'dvoe_iz_lartsa',
+            'title': 'Двое из ларца (2006 - 2008)',
+        },
+        'playlist_mincount': 24,
+    }, {
+        'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
+        'info_dict': {
+            'id': 'dvoe_iz_lartsa/season1',
+            'title': 'Двое из ларца (2006 - 2008) 1 сезон',
+        },
+        'playlist_mincount': 12,
+    }]
 
     def _extract_entries(self, html, compilation_id):
         return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
index 79e8430b5e6a85d59ede8490eac5405950c5dabc..a83dd249f6cd5694884158de6471802df6fe2d01 100644 (file)
@@ -9,29 +9,50 @@ from ..utils import (
     parse_iso8601,
     determine_ext,
     int_or_none,
+    float_or_none,
     str_to_int,
 )
 
 
 class IzleseneIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:www|m)\.)?izlesene\.com/(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)'
-    _STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}'
-    _TEST = {
-        'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
-        'md5': '4384f9f0ea65086734b881085ee05ac2',
-        'info_dict': {
-            'id': '7599694',
-            'ext': 'mp4',
-            'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
-            'description': 'Annesi oğluna doğum günü hediyesi olarak minecraft cd si alıyor, ve çocuk hunharca seviniyor',
-            'thumbnail': 're:^http://.*\.jpg',
-            'uploader_id': 'pelikzzle',
-            'timestamp': 1404298698,
-            'upload_date': '20140702',
-            'duration': 95.395,
-            'age_limit': 0,
-        }
-    }
+    _VALID_URL = r'''(?x)
+        https?://(?:(?:www|m)\.)?izlesene\.com/
+        (?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)
+        '''
+    _TESTS = [
+        {
+            'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
+            'md5': '4384f9f0ea65086734b881085ee05ac2',
+            'info_dict': {
+                'id': '7599694',
+                'ext': 'mp4',
+                'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
+                'description': 'md5:253753e2655dde93f59f74b572454f6d',
+                'thumbnail': 're:^http://.*\.jpg',
+                'uploader_id': 'pelikzzle',
+                'timestamp': 1404298698,
+                'upload_date': '20140702',
+                'duration': 95.395,
+                'age_limit': 0,
+            }
+        },
+        {
+            'url': 'http://www.izlesene.com/video/tarkan-dortmund-2006-konseri/17997',
+            'md5': '97f09b6872bffa284cb7fa4f6910cb72',
+            'info_dict': {
+                'id': '17997',
+                'ext': 'mp4',
+                'title': 'Tarkan Dortmund 2006 Konseri',
+                'description': 'Tarkan Dortmund 2006 Konseri',
+                'thumbnail': 're:^http://.*\.jpg',
+                'uploader_id': 'parlayankiz',
+                'timestamp': 1163318593,
+                'upload_date': '20061112',
+                'duration': 253.666,
+                'age_limit': 0,
+            }
+        },
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -45,18 +66,19 @@ class IzleseneIE(InfoExtractor):
         thumbnail = self._og_search_thumbnail(webpage)
 
         uploader = self._html_search_regex(
-            r"adduserUsername\s*=\s*'([^']+)';", webpage, 'uploader', fatal=False, default='')
+            r"adduserUsername\s*=\s*'([^']+)';",
+            webpage, 'uploader', fatal=False, default='')
         timestamp = parse_iso8601(self._html_search_meta(
             'uploadDate', webpage, 'upload date', fatal=False))
 
-        duration = int_or_none(self._html_search_regex(
-            r'"videoduration"\s*:\s*"([^"]+)"', webpage, 'duration', fatal=False))
-        if duration:
-            duration /= 1000.0
+        duration = float_or_none(self._html_search_regex(
+            r'"videoduration"\s*:\s*"([^"]+)"',
+            webpage, 'duration', fatal=False), scale=1000)
 
         view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
         comment_count = self._html_search_regex(
-            r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'uploader', fatal=False)
+            r'comment_count\s*=\s*\'([^\']+)\';',
+            webpage, 'comment_count', fatal=False)
 
         family_friendly = self._html_search_meta(
             'isFamilyFriendly', webpage, 'age limit', fatal=False)
@@ -66,20 +88,26 @@ class IzleseneIE(InfoExtractor):
         ext = determine_ext(content_url, 'mp4')
 
         # Might be empty for some videos.
-        qualities = self._html_search_regex(
-            r'"quality"\s*:\s*"([^"]+)"', webpage, 'qualities', fatal=False, default='')
+        streams = self._html_search_regex(
+            r'"qualitylevel"\s*:\s*"([^"]+)"',
+            webpage, 'streams', fatal=False, default='')
 
         formats = []
-        for quality in qualities.split('|'):
-            json = self._download_json(
-                self._STREAM_URL.format(id=video_id, format=quality), video_id,
-                note='Getting video URL for "%s" quality' % quality,
-                errnote='Failed to get video URL for "%s" quality' % quality
-            )
+        if streams:
+            for stream in streams.split('|'):
+                quality, url = re.search(r'\[(\w+)\](.+)', stream).groups()
+                formats.append({
+                    'format_id': '%sp' % quality if quality else 'sd',
+                    'url': url,
+                    'ext': ext,
+                })
+        else:
+            stream_url = self._search_regex(
+                r'"streamurl"\s?:\s?"([^"]+)"', webpage, 'stream URL')
             formats.append({
-                'url': json.get('streamurl'),
+                'format_id': 'sd',
+                'url': stream_url,
                 'ext': ext,
-                'format_id': '%sp' % quality if quality else 'sd',
             })
 
         return {
index 772bb5671e8f027b2723b54c00794217f6d94edb..408d00944ceb83e1551c12b5707031c961bb4f5d 100644 (file)
@@ -9,21 +9,30 @@ from ..utils import (
 
 
 class KhanAcademyIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
+    _VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
     IE_NAME = 'KhanAcademy'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.khanacademy.org/video/one-time-pad',
-        'file': 'one-time-pad.mp4',
         'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
         'info_dict': {
+            'id': 'one-time-pad',
+            'ext': 'mp4',
             'title': 'The one-time pad',
             'description': 'The perfect cipher',
             'duration': 176,
             'uploader': 'Brit Cruise',
             'upload_date': '20120411',
         }
-    }
+    }, {
+        'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
+        'info_dict': {
+            'id': 'cryptography',
+            'title': 'Journey into cryptography',
+            'description': 'How have humans protected their secret messages through history? What has changed today?',
+        },
+        'playlist_mincount': 3,
+    }]
 
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
index 281a0ce4052eb986d7d4df6d10b8c29b36cab6d5..5161474171b2a6a53389477275f54480a02d1240 100644 (file)
@@ -19,7 +19,7 @@ from ..utils import (
 class LivestreamIE(InfoExtractor):
     IE_NAME = 'livestream'
     _VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
         'md5': '53274c76ba7754fb0e8d072716f2292b',
         'info_dict': {
@@ -31,7 +31,13 @@ class LivestreamIE(InfoExtractor):
             'view_count': int,
             'thumbnail': 're:^http://.*\.jpg$'
         }
-    }
+    }, {
+        'url': 'http://new.livestream.com/tedx/cityenglish',
+        'info_dict': {
+            'title': 'TEDCity2.0 (English)',
+        },
+        'playlist_mincount': 4,
+    }]
 
     def _parse_smil(self, video_id, smil_url):
         formats = []
@@ -111,34 +117,37 @@ class LivestreamIE(InfoExtractor):
         event_name = mobj.group('event_name')
         webpage = self._download_webpage(url, video_id or event_name)
 
-        og_video = self._og_search_video_url(webpage, 'player url', fatal=False, default=None)
-        if og_video is None:
-            config_json = self._search_regex(
-                r'window.config = ({.*?});', webpage, 'window config')
-            info = json.loads(config_json)['event']
-
-            def is_relevant(vdata, vid):
-                result = vdata['type'] == 'video'
-                if video_id is not None:
-                    result = result and compat_str(vdata['data']['id']) == vid
-                return result
-
-            videos = [self._extract_video_info(video_data['data'])
-                      for video_data in info['feed']['data']
-                      if is_relevant(video_data, video_id)]
-            if video_id is None:
-                # This is an event page:
-                return self.playlist_result(videos, info['id'], info['full_name'])
-            else:
-                if videos:
-                    return videos[0]
-        else:
+        og_video = self._og_search_video_url(
+            webpage, 'player url', fatal=False, default=None)
+        if og_video is not None:
             query_str = compat_urllib_parse_urlparse(og_video).query
             query = compat_urlparse.parse_qs(query_str)
-            api_url = query['play_url'][0].replace('.smil', '')
-            info = json.loads(self._download_webpage(
-                api_url, video_id, 'Downloading video info'))
-            return self._extract_video_info(info)
+            if 'play_url' in query:
+                api_url = query['play_url'][0].replace('.smil', '')
+                info = json.loads(self._download_webpage(
+                    api_url, video_id, 'Downloading video info'))
+                return self._extract_video_info(info)
+
+        config_json = self._search_regex(
+            r'window.config = ({.*?});', webpage, 'window config')
+        info = json.loads(config_json)['event']
+
+        def is_relevant(vdata, vid):
+            result = vdata['type'] == 'video'
+            if video_id is not None:
+                result = result and compat_str(vdata['data']['id']) == vid
+            return result
+
+        videos = [self._extract_video_info(video_data['data'])
+                  for video_data in info['feed']['data']
+                  if is_relevant(video_data, video_id)]
+        if video_id is None:
+            # This is an event page:
+            return self.playlist_result(videos, info['id'], info['full_name'])
+        else:
+            if not videos:
+                raise ExtractorError('Cannot find video %s' % video_id)
+            return videos[0]
 
 
 # The original version of Livestream uses a different system
@@ -148,7 +157,7 @@ class LivestreamOriginalIE(InfoExtractor):
         (?P<user>[^/]+)/(?P<type>video|folder)
         (?:\?.*?Id=|/)(?P<id>.*?)(&|$)
         '''
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
         'info_dict': {
             'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
@@ -159,7 +168,13 @@ class LivestreamOriginalIE(InfoExtractor):
             # rtmp
             'skip_download': True,
         },
-    }
+    }, {
+        'url': 'https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
+        'info_dict': {
+            'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3',
+        },
+        'playlist_mincount': 4,
+    }]
 
     def _extract_video(self, user, video_id):
         api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
@@ -182,15 +197,19 @@ class LivestreamOriginalIE(InfoExtractor):
 
     def _extract_folder(self, url, folder_id):
         webpage = self._download_webpage(url, folder_id)
-        urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))
+        paths = orderedSet(re.findall(
+            r'''(?x)(?:
+                <li\s+class="folder">\s*<a\s+href="|
+                <a\s+href="(?=https?://livestre\.am/)
+            )([^"]+)"''', webpage))
 
         return {
             '_type': 'playlist',
             'id': folder_id,
             'entries': [{
                 '_type': 'url',
-                'url': video_url,
-            } for video_url in urls],
+                'url': compat_urlparse.urljoin(url, p),
+            } for p in paths],
         }
 
     def _real_extract(self, url):
index 5f64e7bd0d98b74aea2a4350a51f057b4d0280ba..520f27fca14a3ed819b452281ede6f4aa86fe4a5 100644 (file)
@@ -6,6 +6,7 @@ from .common import InfoExtractor
 from ..utils import (
     compat_urllib_parse,
     ExtractorError,
+    HEADRequest,
     int_or_none,
     parse_iso8601,
 )
@@ -38,7 +39,7 @@ class MixcloudIE(InfoExtractor):
             try:
                 # We only want to know if the request succeed
                 # don't download the whole file
-                self._request_webpage(url, None, False)
+                self._request_webpage(HEADRequest(url), None, False)
                 return url
             except ExtractorError:
                 url = None
index 37c72bc5357e3766819b0f86d5bc379fdf7406c4..bfdb462ebaf663df7d7c4f3c1618988f0aa1d1e3 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class MLBIE(InfoExtractor):
-    _VALID_URL = r'https?://m\.mlb\.com/(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'
+    _VALID_URL = r'https?://m\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|shared/video/embed/embed\.html\?.*?\bcontent_id=)(?P<id>n?\d+)'
     _TESTS = [
         {
             'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
@@ -69,6 +69,10 @@ class MLBIE(InfoExtractor):
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
         },
+        {
+            'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):
diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py
new file mode 100644 (file)
index 0000000..2ff79b9
--- /dev/null
@@ -0,0 +1,112 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    compat_urllib_parse,
+    compat_urllib_request,
+    int_or_none,
+)
+
+
+class MoeVideoIE(InfoExtractor):
+    IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net'
+    _VALID_URL = r'''(?x)
+        https?://(?P<host>(?:www\.)?
+        (?:(?:moevideo|playreplay|videochart)\.net))/
+        (?:video|framevideo)/(?P<id>[0-9]+\.[0-9A-Za-z]+)'''
+    _API_URL = 'http://api.letitbit.net/'
+    _API_KEY = 'tVL0gjqo5'
+    _TESTS = [
+        {
+            'url': 'http://moevideo.net/video/00297.0036103fe3d513ef27915216fd29',
+            'md5': '129f5ae1f6585d0e9bb4f38e774ffb3a',
+            'info_dict': {
+                'id': '00297.0036103fe3d513ef27915216fd29',
+                'ext': 'flv',
+                'title': 'Sink cut out machine',
+                'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'width': 540,
+                'height': 360,
+                'duration': 179,
+                'filesize': 17822500,
+            }
+        },
+        {
+            'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a',
+            'md5': '74f0a014d5b661f0f0e2361300d1620e',
+            'info_dict': {
+                'id': '77107.7f325710a627383d40540d8e991a',
+                'ext': 'flv',
+                'title': 'Operacion Condor.',
+                'description': 'md5:7e68cb2fcda66833d5081c542491a9a3',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'width': 480,
+                'height': 296,
+                'duration': 6027,
+                'filesize': 588257923,
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(
+            'http://%s/video/%s' % (mobj.group('host'), video_id),
+            video_id, 'Downloading webpage')
+
+        title = self._og_search_title(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._og_search_description(webpage)
+
+        r = [
+            self._API_KEY,
+            [
+                'preview/flv_link',
+                {
+                    'uid': video_id,
+                },
+            ],
+        ]
+        r_json = json.dumps(r)
+        post = compat_urllib_parse.urlencode({'r': r_json})
+        req = compat_urllib_request.Request(self._API_URL, post)
+        req.add_header('Content-type', 'application/x-www-form-urlencoded')
+
+        response = self._download_json(req, video_id)
+        if response['status'] != 'OK':
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, response['data']),
+                expected=True
+            )
+        item = response['data'][0]
+        video_url = item['link']
+        duration = int_or_none(item['length'])
+        width = int_or_none(item['width'])
+        height = int_or_none(item['height'])
+        filesize = int_or_none(item['convert_size'])
+
+        formats = [{
+            'format_id': 'sd',
+            'http_headers': {'Range': 'bytes=0-'},  # Required to download
+            'url': video_url,
+            'width': width,
+            'height': height,
+            'filesize': filesize,
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'description': description,
+            'duration': duration,
+            'formats': formats,
+        }
index b9430b09b749ad8c1dba28e3db794ef435b5aac8..d658647e6ca6d9b7675dd76ea55c58f52887374d 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import os
 import re
 
@@ -8,15 +10,17 @@ from ..utils import (
     compat_urllib_parse,
 )
 
+
 class MofosexIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
+    _VALID_URL = r'^https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
     _TEST = {
-        u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
-        u'file': u'5018.mp4',
-        u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a',
-        u'info_dict': {
-            u"title": u"Japanese Teen Music Video",
-            u"age_limit": 18,
+        'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
+        'md5': '1b2eb47ac33cc75d4a80e3026b613c5a',
+        'info_dict': {
+            'id': '5018',
+            'ext': 'mp4',
+            'title': 'Japanese Teen Music Video',
+            'age_limit': 18,
         }
     }
 
@@ -29,8 +33,8 @@ class MofosexIE(InfoExtractor):
         req.add_header('Cookie', 'age_verified=1')
         webpage = self._download_webpage(req, video_id)
 
-        video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, u'title')
-        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url'))
+        video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, 'title')
+        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
         path = compat_urllib_parse_urlparse(video_url).path
         extension = os.path.splitext(path)[1][1:]
         format = path.split('/')[5].split('_')[:2]
diff --git a/youtube_dl/extractor/musicvault.py b/youtube_dl/extractor/musicvault.py
new file mode 100644 (file)
index 0000000..ebb1eb8
--- /dev/null
@@ -0,0 +1,76 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    unified_strdate,
+)
+
+
+class MusicVaultIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'
+    _TEST = {
+        'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html',
+        'md5': '2cdbb3ae75f7fb3519821507d2fb3c15',
+        'info_dict': {
+            'id': '1010863',
+            'ext': 'mp4',
+            'uploader_id': 'the-allman-brothers-band',
+            'title': 'Straight from the Heart',
+            'duration': 244,
+            'uploader': 'The Allman Brothers Band',
+            'thumbnail': 're:^https?://.*/thumbnail/.*',
+            'upload_date': '19811216',
+            'location': 'Capitol Theatre (Passaic, NJ)',
+            'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')
+        webpage = self._download_webpage(url, display_id)
+
+        thumbnail = self._search_regex(
+            r'<meta itemprop="thumbnail" content="([^"]+)"',
+            webpage, 'thumbnail', fatal=False)
+
+        data_div = self._search_regex(
+            r'(?s)<div class="data">(.*?)</div>', webpage, 'data fields')
+        uploader = self._html_search_regex(
+            r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)
+        title = self._html_search_regex(
+            r'<h2.*?>(.*?)</h2>', data_div, 'title')
+        upload_date = unified_strdate(self._html_search_regex(
+            r'<h3.*?>(.*?)</h3>', data_div, 'uploader', fatal=False))
+        location = self._html_search_regex(
+            r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False)
+
+        duration = parse_duration(self._html_search_meta('duration', webpage))
+
+        VIDEO_URL_TEMPLATE = 'http://cdnapi.kaltura.com/p/%(uid)s/sp/%(wid)s/playManifest/entryId/%(entry_id)s/format/url/protocol/http'
+        kaltura_id = self._search_regex(
+            r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',
+            webpage, 'kaltura ID')
+        video_url = VIDEO_URL_TEMPLATE % {
+            'entry_id': kaltura_id,
+            'wid': self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid'),
+            'uid': self._search_regex(r'uiconf_id/([0-9]+)/', webpage, 'uid'),
+        }
+
+        return {
+            'id': mobj.group('id'),
+            'url': video_url,
+            'ext': 'mp4',
+            'display_id': display_id,
+            'uploader_id': mobj.group('uploader_id'),
+            'thumbnail': thumbnail,
+            'description': self._html_search_meta('description', webpage),
+            'upload_date': upload_date,
+            'location': location,
+            'title': title,
+            'uploader': uploader,
+            'duration': duration,
+        }
index 633b42f728489c6e9f9c61a98b8b0b4d38e57be1..78e650b2d01a87d3772a1f40459171bd7cce5cf4 100644 (file)
@@ -3,18 +3,23 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..utils import (
+    remove_end,
+    parse_duration,
+)
 
 
 class NBAIE(InfoExtractor):
     _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
     _TEST = {
         'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
-        'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
+        'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
         'info_dict': {
             'id': '0021200253-okc-bkn-recap.nba',
             'ext': 'mp4',
-            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
             'title': 'Thunder vs. Nets',
+            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
+            'duration': 181,
         },
     }
 
@@ -27,13 +32,18 @@ class NBAIE(InfoExtractor):
         video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
 
         shortened_video_id = video_id.rpartition('/')[2]
-        title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
+        title = remove_end(
+            self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
+
+        description = self._og_search_description(webpage)
+        duration = parse_duration(
+            self._html_search_meta('duration', webpage, 'duration', fatal=False))
 
-        description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
 
         return {
             'id': shortened_video_id,
             'url': video_url,
             'title': title,
             'description': description,
+            'duration': duration,
         }
index 2edd806a3f6aa12792f3c8d8065a57fd2e2e70a1..ceda1dcc0f6a0ef70d3590990910a0a36556e70a 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 import json
 
@@ -25,8 +27,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
             'path': initial_video_url.replace('.mp4', '_sd.mp4'),
         })
         path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
-        path_doc = self._download_xml(path_url, video_id,
-            u'Downloading final video url')
+        path_doc = self._download_xml(
+            path_url, video_id, 'Downloading final video url')
         video_url = path_doc.find('path').text
 
         join = compat_urlparse.urljoin
@@ -43,17 +45,18 @@ class NHLBaseInfoExtractor(InfoExtractor):
 
 
 class NHLIE(NHLBaseInfoExtractor):
-    IE_NAME = u'nhl.com'
-    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)'
+    IE_NAME = 'nhl.com'
+    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?:[?&])id=(?P<id>[0-9]+)'
 
     _TEST = {
-        u'url': u'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
-        u'file': u'453614.mp4',
-        u'info_dict': {
-            u'title': u'Quick clip: Weise 4-3 goal vs Flames',
-            u'description': u'Dale Weise scores his first of the season to put the Canucks up 4-3.',
-            u'duration': 18,
-            u'upload_date': u'20131006',
+        'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
+        'info_dict': {
+            'id': '453614',
+            'ext': 'mp4',
+            'title': 'Quick clip: Weise 4-3 goal vs Flames',
+            'description': 'Dale Weise scores his first of the season to put the Canucks up 4-3.',
+            'duration': 18,
+            'upload_date': '20131006',
         },
     }
 
@@ -61,23 +64,23 @@ class NHLIE(NHLBaseInfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
-        info_json = self._download_webpage(json_url, video_id,
-            u'Downloading info json')
-        info_json = self._fix_json(info_json)
-        info = json.loads(info_json)[0]
-        return self._extract_video(info)
+        data = self._download_json(
+            json_url, video_id, transform_source=self._fix_json)
+        return self._extract_video(data[0])
 
 
 class NHLVideocenterIE(NHLBaseInfoExtractor):
-    IE_NAME = u'nhl.com:videocenter'
-    IE_DESC = u'NHL videocenter category'
-    _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
-
-    @classmethod
-    def suitable(cls, url):
-        if NHLIE.suitable(url):
-            return False
-        return super(NHLVideocenterIE, cls).suitable(url)
+    IE_NAME = 'nhl.com:videocenter'
+    IE_DESC = 'NHL videocenter category'
+    _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
+    _TEST = {
+        'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
+        'info_dict': {
+            'id': '999',
+            'title': 'Highlights',
+        },
+        'playlist_count': 12,
+    }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -86,10 +89,10 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
         cat_id = self._search_regex(
             [r'var defaultCatId = "(.+?)";',
              r'{statusIndex:0,index:0,.*?id:(.*?),'],
-            webpage, u'category id')
+            webpage, 'category id')
         playlist_title = self._html_search_regex(
             r'tab0"[^>]*?>(.*?)</td>',
-            webpage, u'playlist title', flags=re.DOTALL).lower().capitalize()
+            webpage, 'playlist title', flags=re.DOTALL).lower().capitalize()
 
         data = compat_urllib_parse.urlencode({
             'cid': cat_id,
@@ -104,7 +107,7 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
         response = self._fix_json(response)
         if not response.strip():
             self._downloader.report_warning(u'Got an empty reponse, trying '
-                                            u'adding the "newvideos" parameter')
+                                            'adding the "newvideos" parameter')
             response = self._download_webpage(request_url + '&newvideos=true',
                 playlist_title)
             response = self._fix_json(response)
@@ -114,5 +117,5 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
             '_type': 'playlist',
             'title': playlist_title,
             'id': cat_id,
-            'entries': [self._extract_video(i) for i in videos],
+            'entries': [self._extract_video(v) for v in videos],
         }
index da203538dbea3781d0daf05edbbaacbd72be622f..959fdf59027018e0b78030e4670cca8ca1a08855 100644 (file)
@@ -5,7 +5,10 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    compat_urllib_request,
+    compat_urllib_parse,
     ExtractorError,
+    clean_html,
     unified_strdate,
     compat_str,
 )
@@ -13,6 +16,8 @@ from ..utils import (
 
 class NocoIE(InfoExtractor):
     _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
+    _LOGIN_URL = 'http://noco.tv/do.php'
+    _NETRC_MACHINE = 'noco'
 
     _TEST = {
         'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
@@ -30,6 +35,28 @@ class NocoIE(InfoExtractor):
         'skip': 'Requires noco account',
     }
 
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        login_form = {
+            'a': 'login',
+            'cookie': '1',
+            'username': username,
+            'password': password,
+        }
+        request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
+        request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
+
+        login = self._download_json(request, None, 'Logging in as %s' % username)
+
+        if 'erreur' in login:
+            raise  ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
diff --git a/youtube_dl/extractor/nosvideo.py b/youtube_dl/extractor/nosvideo.py
new file mode 100644 (file)
index 0000000..f3be8f5
--- /dev/null
@@ -0,0 +1,76 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    compat_urllib_request,
+    urlencode_postdata,
+    xpath_text,
+    xpath_with_ns,
+)
+
+_x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'})
+
+
+class NosVideoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?nosvideo\.com/' + \
+                 '(?:embed/|\?v=)(?P<id>[A-Za-z0-9]{12})/?'
+    _PLAYLIST_URL = 'http://nosvideo.com/xml/{xml_id:s}.xml'
+    _FILE_DELETED_REGEX = r'<b>File Not Found</b>'
+    _TEST = {
+        'url': 'http://nosvideo.com/?v=mu8fle7g7rpq',
+        'md5': '6124ed47130d8be3eacae635b071e6b6',
+        'info_dict': {
+            'id': 'mu8fle7g7rpq',
+            'ext': 'mp4',
+            'title': 'big_buck_bunny_480p_surround-fix.avi.mp4',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        fields = {
+            'id': video_id,
+            'op': 'download1',
+            'method_free': 'Continue to Video',
+        }
+        req = compat_urllib_request.Request(url, urlencode_postdata(fields))
+        req.add_header('Content-type', 'application/x-www-form-urlencoded')
+        webpage = self._download_webpage(req, video_id,
+                                         'Downloading download page')
+        if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
+            raise ExtractorError('Video %s does not exist' % video_id,
+                                 expected=True)
+
+        xml_id = self._search_regex(r'php\|([^\|]+)\|', webpage, 'XML ID')
+        playlist_url = self._PLAYLIST_URL.format(xml_id=xml_id)
+        playlist = self._download_xml(playlist_url, video_id)
+
+        track = playlist.find(_x('.//xspf:track'))
+        if track is None:
+            raise ExtractorError(
+                'XML playlist is missing the \'track\' element',
+                expected=True)
+        title = xpath_text(track, _x('./xspf:title'), 'title')
+        url = xpath_text(track, _x('./xspf:file'), 'URL', fatal=True)
+        thumbnail = xpath_text(track, _x('./xspf:image'), 'thumbnail')
+        if title is not None:
+            title = title.strip()
+
+        formats = [{
+            'format_id': 'sd',
+            'url': url,
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
index 12e85a716fec900cf01d72157ab4159bc69ae8f8..902d6294498dec327699f51a1bc309f4557b3f87 100644 (file)
@@ -5,6 +5,7 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     unified_strdate,
+    qualities,
 )
 
 
@@ -17,7 +18,7 @@ class NPOIE(InfoExtractor):
         'md5': '4b3f9c429157ec4775f2c9cb7b911016',
         'info_dict': {
             'id': 'VPWON_1220719',
-            'ext': 'mp4',
+            'ext': 'm4v',
             'title': 'Nieuwsuur',
             'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
             'upload_date': '20140622',
@@ -39,24 +40,32 @@ class NPOIE(InfoExtractor):
             video_id,
             note='Downloading token'
         )
-        token = self._search_regex(r'npoplayer.token = "(.+?)"', token_page, 'token')
-        streams_info = self._download_json(
-            'http://ida.omroep.nl/odi/?prid=%s&puboptions=h264_std&adaptive=yes&token=%s' % (video_id, token),
-            video_id
-        )
+        token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token')
 
-        stream_info = self._download_json(
-            streams_info['streams'][0] + '&type=json',
-            video_id,
-            'Downloading stream info'
-        )
+        formats = []
+        quality = qualities(['adaptive', 'h264_sb', 'h264_bb', 'h264_std'])
+        for format_id in metadata['pubopties']:
+            streams_info = self._download_json(
+                'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token),
+                video_id, 'Downloading %s streams info' % format_id)
+            stream_info = self._download_json(
+                streams_info['streams'][0] + '&type=json',
+                video_id, 'Downloading %s stream info' % format_id)
+            if format_id == 'adaptive':
+                formats.extend(self._extract_m3u8_formats(stream_info['url'], video_id))
+            else:
+                formats.append({
+                    'url': stream_info['url'],
+                    'format_id': format_id,
+                    'quality': quality(format_id),
+                })
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
             'title': metadata['titel'],
-            'ext': 'mp4',
-            'url': stream_info['url'],
             'description': metadata['info'],
             'thumbnail': metadata['images'][-1]['url'],
             'upload_date': unified_strdate(metadata['gidsdatum']),
+            'formats': formats,
         }
index 718fe9aba5fc710ee5efc47c2bbae2b02fc3c117..48ce6e7303e37463f991bb7e74241987b833297a 100644 (file)
@@ -27,47 +27,40 @@ class PornHdIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        title = self._og_search_title(webpage)
-        TITLE_SUFFIX = ' porn HD Video | PornHD.com '
-        if title.endswith(TITLE_SUFFIX):
-            title = title[:-len(TITLE_SUFFIX)]
-
+        title = self._html_search_regex(
+            r'<title>(.+) porn HD.+?</title>', webpage, 'title')
         description = self._html_search_regex(
             r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
         view_count = int_or_none(self._html_search_regex(
-            r'(\d+) views      </span>', webpage, 'view count', fatal=False))
+            r'(\d+) views\s*</span>', webpage, 'view count', fatal=False))
 
-        formats = [
-            {
-                'url': format_url,
-                'ext': format.lower(),
-                'format_id': '%s-%s' % (format.lower(), quality.lower()),
-                'quality': 1 if quality.lower() == 'high' else 0,
-            } for format, quality, format_url in re.findall(
-                r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage)
-        ]
+        videos = re.findall(
+            r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage)
 
         mobj = re.search(r'flashVars = (?P<flashvars>{.+?});', webpage)
         if mobj:
             flashvars = json.loads(mobj.group('flashvars'))
-            formats.extend([
-                {
-                    'url': flashvars['hashlink'].replace('?noProxy=1', ''),
-                    'ext': 'flv',
-                    'format_id': 'flv-low',
-                    'quality': 0,
-                },
-                {
-                    'url': flashvars['hd'].replace('?noProxy=1', ''),
-                    'ext': 'flv',
-                    'format_id': 'flv-high',
-                    'quality': 1,
-                }
-            ])
+            for key, quality in [('hashlink', 'low'), ('hd', 'high')]:
+                redirect_url = flashvars.get(key)
+                if redirect_url:
+                    videos.append(('flv', quality, redirect_url))
             thumbnail = flashvars['urlWallpaper']
         else:
             thumbnail = self._og_search_thumbnail(webpage)
 
+        formats = []
+        for format_, quality, redirect_url in videos:
+            format_id = '%s-%s' % (format_.lower(), quality.lower())
+            video_url = self._download_webpage(
+                redirect_url, video_id, 'Downloading %s video link' % format_id, fatal=False)
+            if not video_url:
+                continue
+            formats.append({
+                'url': video_url,
+                'ext': format_.lower(),
+                'format_id': format_id,
+                'quality': 1 if quality.lower() == 'high' else 0,
+            })
         self._sort_formats(formats)
 
         return {
diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py
new file mode 100644 (file)
index 0000000..202f586
--- /dev/null
@@ -0,0 +1,65 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    str_to_int,
+)
+
+
+class PornoXOIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html'
+    _TEST = {
+        'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html',
+        'md5': '582f28ecbaa9e6e24cb90f50f524ce87',
+        'info_dict': {
+            'id': '7564',
+            'ext': 'flv',
+            'title': 'Striptease From Sexy Secretary!',
+            'description': 'Striptease From Sexy Secretary!',
+            'categories': list,  # NSFW
+            'thumbnail': 're:https?://.*\.jpg$',
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._html_search_regex(
+            r'\'file\'\s*:\s*"([^"]+)"', webpage, 'video_url')
+
+        title = self._html_search_regex(
+            r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title')
+
+        description = self._html_search_regex(
+            r'<meta name="description" content="([^"]+)\s*featuring',
+            webpage, 'description', fatal=False)
+
+        thumbnail = self._html_search_regex(
+            r'\'image\'\s*:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
+
+        view_count = str_to_int(self._html_search_regex(
+            r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False))
+
+        categories_str = self._html_search_regex(
+            r'<meta name="description" content=".*featuring\s*([^"]+)"',
+            webpage, 'categories', fatal=False)
+        categories = (
+            None if categories_str is None
+            else categories_str.split(','))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'categories': categories,
+            'view_count': view_count,
+            'age_limit': 18,
+        }
diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py
new file mode 100644 (file)
index 0000000..463e855
--- /dev/null
@@ -0,0 +1,67 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    determine_ext,
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+
+
+class PromptFileIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)'
+    _FILE_NOT_FOUND_REGEX = r'<div.+id="not_found_msg".+>.+</div>[^-]'
+    _TEST = {
+        'url': 'http://www.promptfile.com/l/D21B4746E9-F01462F0FF',
+        'md5': 'd1451b6302da7215485837aaea882c4c',
+        'info_dict': {
+            'id': 'D21B4746E9-F01462F0FF',
+            'ext': 'mp4',
+            'title': 'Birds.mp4',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
+            raise ExtractorError('Video %s does not exist' % video_id,
+                                 expected=True)
+
+        fields = dict(re.findall(r'''(?x)type="hidden"\s+
+            name="(.+?)"\s+
+            value="(.*?)"
+            ''', webpage))
+        post = compat_urllib_parse.urlencode(fields)
+        req = compat_urllib_request.Request(url, post)
+        req.add_header('Content-type', 'application/x-www-form-urlencoded')
+        webpage = self._download_webpage(
+            req, video_id, 'Downloading video page')
+
+        url = self._html_search_regex(r'url:\s*\'([^\']+)\'', webpage, 'URL')
+        title = self._html_search_regex(
+            r'<span.+title="([^"]+)">', webpage, 'title')
+        thumbnail = self._html_search_regex(
+            r'<div id="player_overlay">.*button>.*?<img src="([^"]+)"',
+            webpage, 'thumbnail', fatal=False, flags=re.DOTALL)
+
+        formats = [{
+            'format_id': 'sd',
+            'url': url,
+            'ext': determine_ext(title),
+        }]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
index da64a1a7b4c0d8bceb89415894c84d651c7ac566..5b2a723c1d8dce6f05fcdd3647c93b48ad41dc5a 100644 (file)
@@ -145,7 +145,6 @@ class ProSiebenSat1IE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Kurztrips zum Valentinstag',
                 'description': 'md5:8ba6301e70351ae0bedf8da00f7ba528',
-                'upload_date': '20130206',
                 'duration': 307.24,
             },
             'params': {
@@ -240,7 +239,7 @@ class ProSiebenSat1IE(InfoExtractor):
         thumbnail = self._og_search_thumbnail(page)
 
         upload_date = unified_strdate(self._html_search_regex(
-            self._UPLOAD_DATE_REGEXES, page, 'upload date', fatal=False))
+            self._UPLOAD_DATE_REGEXES, page, 'upload date', default=None))
 
         formats = []
 
@@ -249,7 +248,7 @@ class ProSiebenSat1IE(InfoExtractor):
             urls_sources = urls_sources.values()
 
         def fix_bitrate(bitrate):
-            return bitrate / 1000 if bitrate % 1000 == 0 else bitrate
+            return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
 
         for source in urls_sources:
             protocol = source['protocol']
index 2d9511d5ea21a78605503abb2d01dc2df913f8d7..0ab1eb69c8c82ae0bc04ec135971cd919b17a155 100644 (file)
@@ -12,22 +12,16 @@ class RtlXlIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
+        'md5': 'cc16baa36a6c169391f0764fa6b16654',
         'info_dict': {
             'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'RTL Nieuws - Laat',
-            'description': 'Dagelijks het laatste nieuws uit binnen- en '
-                'buitenland. Voor nog meer nieuws kunt u ook gebruikmaken van '
-                'onze mobiele apps.',
+            'description': 'md5:6b61f66510c8889923b11f2778c72dc5',
             'timestamp': 1408051800,
             'upload_date': '20140814',
             'duration': 576.880,
         },
-        'params': {
-            # We download the first bytes of the first fragment, it can't be
-            # processed by the f4m downloader beacuse it isn't complete
-            'skip_download': True,
-        },
     }
 
     def _real_extract(self, url):
@@ -41,14 +35,32 @@ class RtlXlIE(InfoExtractor):
         material = info['material'][0]
         episode_info = info['episodes'][0]
 
-        f4m_url = 'http://manifest.us.rtl.nl' + material['videopath']
         progname = info['abstracts'][0]['name']
         subtitle = material['title'] or info['episodes'][0]['name']
 
+        videopath = material['videopath']
+        f4m_url = 'http://manifest.us.rtl.nl' + videopath
+
+        formats = self._extract_f4m_formats(f4m_url, uuid)
+
+        video_urlpart = videopath.split('/flash/')[1][:-4]
+        PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
+
+        formats.extend([
+            {
+                'url': PG_URL_TEMPLATE % ('a2m', video_urlpart),
+                'format_id': 'pg-sd',
+            },
+            {
+                'url': PG_URL_TEMPLATE % ('a3m', video_urlpart),
+                'format_id': 'pg-hd',
+            }
+        ])
+
         return {
             'id': uuid,
             'title': '%s - %s' % (progname, subtitle),
-            'formats': self._extract_f4m_formats(f4m_url, uuid),
+            'formats': formats,
             'timestamp': material['original_date'],
             'description': episode_info['synopsis'],
             'duration': parse_duration(material.get('duration')),
index 357edbbdaf88c6c29395aa7878c18f305c79b216..0c8790da28c4b06cfbc941bdff7ad4e64b47ac74 100644 (file)
@@ -74,6 +74,13 @@ class RutubeChannelIE(InfoExtractor):
     IE_NAME = 'rutube:channel'
     IE_DESC = 'Rutube channels'
     _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://rutube.ru/tags/video/1800/',
+        'info_dict': {
+            'id': '1800',
+        },
+        'playlist_mincount': 68,
+    }]
 
     _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
 
@@ -101,6 +108,7 @@ class RutubeMovieIE(RutubeChannelIE):
     IE_NAME = 'rutube:movie'
     IE_DESC = 'Rutube movies'
     _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)'
+    _TESTS = []
 
     _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
     _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
@@ -119,5 +127,12 @@ class RutubePersonIE(RutubeChannelIE):
     IE_NAME = 'rutube:person'
     IE_DESC = 'Rutube person videos'
     _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://rutube.ru/video/person/313878/',
+        'info_dict': {
+            'id': '313878',
+        },
+        'playlist_mincount': 37,
+    }]
 
     _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
index 6c5f5a6804722bc43451f595dcdff56c3a7377b6..f737b4e5fad8cd86e28b094abcb4226927083d2a 100644 (file)
@@ -100,7 +100,7 @@ class RUTVIE(InfoExtractor):
             return mobj.group('url')
 
         mobj = re.search(
-            r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>http://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
+            r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
             webpage)
         if mobj:
             return mobj.group('url')
diff --git a/youtube_dl/extractor/sharesix.py b/youtube_dl/extractor/sharesix.py
new file mode 100644 (file)
index 0000000..7531e83
--- /dev/null
@@ -0,0 +1,91 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    compat_urllib_request,
+    parse_duration,
+)
+
+
+class ShareSixIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?sharesix\.com/(?:f/)?(?P<id>[0-9a-zA-Z]+)'
+    _TESTS = [
+        {
+            'url': 'http://sharesix.com/f/OXjQ7Y6',
+            'md5': '9e8e95d8823942815a7d7c773110cc93',
+            'info_dict': {
+                'id': 'OXjQ7Y6',
+                'ext': 'mp4',
+                'title': 'big_buck_bunny_480p_surround-fix.avi',
+                'duration': 596,
+                'width': 854,
+                'height': 480,
+            },
+        },
+        {
+            'url': 'http://sharesix.com/lfrwoxp35zdd',
+            'md5': 'dd19f1435b7cec2d7912c64beeee8185',
+            'info_dict': {
+                'id': 'lfrwoxp35zdd',
+                'ext': 'flv',
+                'title': 'WhiteBoard___a_Mac_vs_PC_Parody_Cartoon.mp4.flv',
+                'duration': 65,
+                'width': 1280,
+                'height': 720,
+            },
+        }
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        fields = {
+            'method_free': 'Free'
+        }
+        post = compat_urllib_parse.urlencode(fields)
+        req = compat_urllib_request.Request(url, post)
+        req.add_header('Content-type', 'application/x-www-form-urlencoded')
+
+        webpage = self._download_webpage(req, video_id,
+                                         'Downloading video page')
+
+        video_url = self._search_regex(
+            r"var\slnk1\s=\s'([^']+)'", webpage, 'video URL')
+        title = self._html_search_regex(
+            r'(?s)<dt>Filename:</dt>.+?<dd>(.+?)</dd>', webpage, 'title')
+        duration = parse_duration(
+            self._search_regex(
+                r'(?s)<dt>Length:</dt>.+?<dd>(.+?)</dd>',
+                webpage,
+                'duration',
+                fatal=False
+            )
+        )
+
+        m = re.search(
+            r'''(?xs)<dt>Width\sx\sHeight</dt>.+?
+                     <dd>(?P<width>\d+)\sx\s(?P<height>\d+)</dd>''',
+            webpage
+        )
+        width = height = None
+        if m:
+            width, height = int(m.group('width')), int(m.group('height'))
+
+        formats = [{
+            'format_id': 'sd',
+            'url': video_url,
+            'width': width,
+            'height': height,
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'duration': duration,
+            'formats': formats,
+        }
index 13e7e71cb37b4d7b5ec2e5ab2c341551e7e05f28..9bd5defa7ac5e171904eb681015e9bcf1661acb9 100644 (file)
@@ -267,6 +267,14 @@ class SmotriCommunityIE(InfoExtractor):
     IE_DESC = 'Smotri.com community videos'
     IE_NAME = 'smotri:community'
     _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
+    _TEST = {
+        'url': 'http://smotri.com/community/video/kommuna',
+        'info_dict': {
+            'id': 'kommuna',
+            'title': 'КПРФ',
+        },
+        'playlist_mincount': 4,
+    }
     
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -289,6 +297,14 @@ class SmotriUserIE(InfoExtractor):
     IE_DESC = 'Smotri.com user videos'
     IE_NAME = 'smotri:user'
     _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
+    _TESTS = [{
+        'url': 'http://smotri.com/user/inspector',
+        'info_dict': {
+            'id': 'inspector',
+            'title': 'Inspector',
+        },
+        'playlist_mincount': 9,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index dc9f8055013170a87a447ee6370f5cae4546174d..c663e56d42ed02645313637cd7866a9071d10ae7 100644 (file)
@@ -61,7 +61,10 @@ class SockshareIE(InfoExtractor):
             r'<a href="([^"]*)".+class="download_file_link"',
             webpage, 'file url')
         video_url = "http://www.sockshare.com" + video_url
-        title = self._html_search_regex(r'<h1>(.+)<strong>', webpage, 'title')
+        title = self._html_search_regex((
+            r'<h1>(.+)<strong>',
+            r'var name = "([^"]+)";'),
+            webpage, 'title', default=None)
         thumbnail = self._html_search_regex(
             r'<img\s+src="([^"]*)".+?name="bg"',
             webpage, 'thumbnail')
index 097d0e418d452a968cdf0355419b02c4dd392081..b78aed7f0ed29a32b991c1ba82c6716ec65d9aa4 100644 (file)
@@ -28,7 +28,8 @@ class SoundcloudIE(InfoExtractor):
     _VALID_URL = r'''(?x)^(?:https?://)?
                     (?:(?:(?:www\.|m\.)?soundcloud\.com/
                             (?P<uploader>[\w\d-]+)/
-                            (?!sets/)(?P<title>[\w\d-]+)/?
+                            (?!sets/|likes/?(?:$|[?#]))
+                            (?P<title>[\w\d-]+)/?
                             (?P<token>[^?]+?)?(?:[?].*)?$)
                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
                        |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
@@ -221,13 +222,16 @@ class SoundcloudIE(InfoExtractor):
 class SoundcloudSetIE(SoundcloudIE):
     _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
     IE_NAME = 'soundcloud:set'
-    # it's in tests/test_playlists.py
-    _TESTS = []
+    _TESTS = [{
+        'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
+        'info_dict': {
+            'title': 'The Royal Concept EP',
+        },
+        'playlist_mincount': 6,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)
 
         # extract uploader (which is in the url)
         uploader = mobj.group(1)
@@ -246,20 +250,32 @@ class SoundcloudSetIE(SoundcloudIE):
                 self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
             return
 
-        self.report_extraction(full_title)
-        return {'_type': 'playlist',
-                'entries': [self._extract_info_dict(track) for track in info['tracks']],
-                'id': info['id'],
-                'title': info['title'],
-                }
+        return {
+            '_type': 'playlist',
+            'entries': [self._extract_info_dict(track) for track in info['tracks']],
+            'id': info['id'],
+            'title': info['title'],
+        }
 
 
 class SoundcloudUserIE(SoundcloudIE):
     _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
     IE_NAME = 'soundcloud:user'
-
-    # it's in tests/test_playlists.py
-    _TESTS = []
+    _TESTS = [{
+        'url': 'https://soundcloud.com/the-concept-band',
+        'info_dict': {
+            'id': '9615865',
+            'title': 'The Royal Concept',
+        },
+        'playlist_mincount': 12
+    }, {
+        'url': 'https://soundcloud.com/the-concept-band/likes',
+        'info_dict': {
+            'id': '9615865',
+            'title': 'The Royal Concept',
+        },
+        'playlist_mincount': 1,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -301,9 +317,18 @@ class SoundcloudUserIE(SoundcloudIE):
 class SoundcloudPlaylistIE(SoundcloudIE):
     _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)'
     IE_NAME = 'soundcloud:playlist'
+    _TESTS = [
 
-     # it's in tests/test_playlists.py
-    _TESTS = []
+        {
+            'url': 'http://api.soundcloud.com/playlists/4110309',
+            'info_dict': {
+                'id': '4110309',
+                'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
+                'description': 're:.*?TILT Brass - Bowery Poetry Club',
+            },
+            'playlist_count': 6,
+        }
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index 340a38440d02ad28b5eb6ab19916eee870818c35..9ed7d3b39e227806971fe98f43e1c1018b84ad3c 100644 (file)
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..utils import compat_urlparse
 
 
 class SpiegelIE(InfoExtractor):
@@ -28,16 +29,6 @@ class SpiegelIE(InfoExtractor):
             'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
             'duration': 983,
         },
-    }, {
-        'url': 'http://www.spiegel.de/video/johann-westhauser-videobotschaft-des-hoehlenforschers-video-1502367.html',
-        'md5': '54f58ba0e752e3c07bc2a26222dd0acf',
-        'info_dict': {
-            'id': '1502367',
-            'ext': 'mp4',
-            'title': 'Videobotschaft: Höhlenforscher Westhauser dankt seinen Rettern',
-            'description': 'md5:c6f1ec11413ebd1088b6813943e5fc91',
-            'duration': 42,
-        },
     }]
 
     def _real_extract(self, url):
@@ -82,3 +73,34 @@ class SpiegelIE(InfoExtractor):
             'duration': duration,
             'formats': formats,
         }
+
+
+class SpiegelArticleIE(InfoExtractor):
+    _VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
+    IE_NAME = 'Spiegel:Article'
+    IE_DESC = 'Articles on spiegel.de'
+    _TEST = {
+        'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
+        'info_dict': {
+            'id': '1516455',
+            'ext': 'mp4',
+            'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
+            'description': 're:^Patrick Kämnitz gehört.{100,}',
+        },
+    }
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        video_link = self._search_regex(
+            r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage,
+            'video page URL')
+        video_url = compat_urlparse.urljoin(
+            self.http_scheme() + '//spiegel.de/', video_link)
+
+        return {
+            '_type': 'url',
+            'url': video_url,
+        }
diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py
new file mode 100644 (file)
index 0000000..185353b
--- /dev/null
@@ -0,0 +1,96 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_request,
+    parse_iso8601,
+)
+
+
+class SportDeutschlandIE(InfoExtractor):
+    _VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
+    _TESTS = [{
+        'url': 'http://sportdeutschland.tv/badminton/live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
+        'info_dict': {
+            'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
+            'ext': 'mp4',
+            'title': 'LIVE: Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen',
+            'categories': ['Badminton'],
+            'view_count': int,
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 're:^Die Badminton-WM 2014 aus Kopenhagen LIVE',
+            'timestamp': int,
+            'upload_date': 're:^201408[23][0-9]$',
+        },
+        'params': {
+            'skip_download': 'Live stream',
+        },
+    }, {
+        'url': 'http://sportdeutschland.tv/li-ning-badminton-wm-2014/lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs',
+        'info_dict': {
+            'id': 'lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs',
+            'ext': 'mp4',
+            'upload_date': '20140825',
+            'description': 'md5:60a20536b57cee7d9a4ec005e8687504',
+            'timestamp': 1408976060,
+            'title': 'Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen: Herren Einzel, Wei Lee vs. Keun Lee',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'view_count': int,
+            'categories': ['Li-Ning Badminton WM 2014'],
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        sport_id = mobj.group('sport')
+
+        api_url = 'http://splink.tv/api/permalinks/%s/%s' % (
+            sport_id, video_id)
+        req = compat_urllib_request.Request(api_url, headers={
+            'Accept': 'application/vnd.vidibus.v2.html+json',
+            'Referer': url,
+        })
+        data = self._download_json(req, video_id)
+
+        categories = list(data.get('section', {}).get('tags', {}).values())
+        asset = data['asset']
+
+        formats = []
+        smil_url = asset['video']
+        if '.smil' in smil_url:
+            m3u8_url = smil_url.replace('.smil', '.m3u8')
+            formats.extend(
+                self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'))
+
+            smil_doc = self._download_xml(
+                smil_url, video_id, note='Downloading SMIL metadata')
+            base_url = smil_doc.find('./head/meta').attrib['base']
+            formats.extend([{
+                'format_id': 'rmtp',
+                'url': base_url,
+                'play_path': n.attrib['src'],
+                'ext': 'flv',
+                'preference': -100,
+                'format_note': 'Seems to fail at example stream',
+            } for n in smil_doc.findall('./body/video')])
+        else:
+            formats.append({'url': smil_url})
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': asset['title'],
+            'thumbnail': asset.get('image'),
+            'description': asset.get('teaser'),
+            'categories': categories,
+            'view_count': asset.get('views'),
+            'rtmp_live': asset.get('live'),
+            'timestamp': parse_iso8601(asset.get('date')),
+        }
+
diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py
new file mode 100644 (file)
index 0000000..7de3c9d
--- /dev/null
@@ -0,0 +1,70 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    int_or_none,
+    qualities,
+    determine_ext,
+)
+
+
+class SunPornoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?sunporno\.com/videos/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.sunporno.com/videos/807778/',
+        'md5': '6457d3c165fd6de062b99ef6c2ff4c86',
+        'info_dict': {
+            'id': '807778',
+            'ext': 'flv',
+            'title': 'md5:0a400058e8105d39e35c35e7c5184164',
+            'description': 'md5:a31241990e1bd3a64e72ae99afb325fb',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 302,
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
+        description = self._html_search_meta('description', webpage, 'description')
+        thumbnail = self._html_search_regex(
+            r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
+
+        duration = parse_duration(self._search_regex(
+            r'<span>Duration: (\d+:\d+)</span>', webpage, 'duration', fatal=False))
+
+        view_count = int_or_none(self._html_search_regex(
+            r'<span class="views">(\d+)</span>', webpage, 'view count', fatal=False))
+        comment_count = int_or_none(self._html_search_regex(
+            r'(\d+)</b> Comments?', webpage, 'comment count', fatal=False))
+
+        formats = []
+        quality = qualities(['mp4', 'flv'])
+        for video_url in re.findall(r'<source src="([^"]+)"', webpage):
+            video_ext = determine_ext(video_url)
+            formats.append({
+                'url': video_url,
+                'format_id': video_ext,
+                'quality': quality(video_ext),
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'view_count': view_count,
+            'comment_count': comment_count,
+            'formats': formats,
+            'age_limit': 18,
+        }
index 5d9d703673265ca4a53a54f28e34494d570cb206..13c6ea67728d040a9e1f17111031952492d921b5 100644 (file)
@@ -52,20 +52,6 @@ class SWRMediathekIE(InfoExtractor):
             'uploader': 'SWR 2',
             'uploader_id': '284670',
         }
-    }, {
-        'url': 'http://swrmediathek.de/content/player.htm?show=52dc7e00-15c5-11e4-84bc-0026b975f2e6',
-        'md5': '881531487d0633080a8cc88d31ef896f',
-        'info_dict': {
-            'id': '52dc7e00-15c5-11e4-84bc-0026b975f2e6',
-            'ext': 'mp4',
-            'title': 'Familienspaß am Bodensee',
-            'description': 'md5:0b591225a32cfde7be1629ed49fe4315',
-            'thumbnail': 're:http://.*\.jpg',
-            'duration': 1784,
-            'upload_date': '20140727',
-            'uploader': 'SWR Fernsehen BW',
-            'uploader_id': '281130',
-        }
     }]
 
     def _real_extract(self, url):
index 46d727d1de6743edcb99109b77caa49ebc1bf0c6..8a95fd6563999f1f59808e2b2090ede9ee312f7a 100644 (file)
@@ -106,6 +106,13 @@ class TeacherTubeUserIE(InfoExtractor):
         \s*
         <a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)"
     '''
+    _TEST = {
+        'url': 'http://www.teachertube.com/user/profile/rbhagwati2',
+        'info_dict': {
+            'id': 'rbhagwati2'
+        },
+        'playlist_mincount': 179,
+    }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index a55f236cbbca0ac5ef70db2b22eb3c94a778b2c1..16e945d8e624adc51e6a68eab786bdece0a29960 100644 (file)
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
@@ -11,24 +13,30 @@ class TechTalksIE(InfoExtractor):
     _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
 
     _TEST = {
-        u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
-        u'playlist': [
+        'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
+        'info_dict': {
+            'id': '57758',
+            'title': 'Learning Topic Models --- Going beyond SVD',
+        },
+        'playlist': [
             {
-                u'file': u'57758.flv',
-                u'info_dict': {
-                    u'title': u'Learning Topic Models --- Going beyond SVD',
+                'info_dict': {
+                    'id': '57758',
+                    'ext': 'flv',
+                    'title': 'Learning Topic Models --- Going beyond SVD',
                 },
             },
             {
-                u'file': u'57758-slides.flv',
-                u'info_dict': {
-                    u'title': u'Learning Topic Models --- Going beyond SVD',
+                'info_dict': {
+                    'id': '57758-slides',
+                    'ext': 'flv',
+                    'title': 'Learning Topic Models --- Going beyond SVD',
                 },
             },
         ],
-        u'params': {
+        'params': {
             # rtmp download
-            u'skip_download': True,
+            'skip_download': True,
         },
     }
 
@@ -36,30 +44,36 @@ class TechTalksIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         talk_id = mobj.group('id')
         webpage = self._download_webpage(url, talk_id)
-        rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage,
-            u'rtmp url')
-        play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
-            webpage, u'presenter play path')
+        rtmp_url = self._search_regex(
+            r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
+        play_path = self._search_regex(
+            r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
+            webpage, 'presenter play path')
         title = clean_html(get_element_by_attribute('class', 'title', webpage))
         video_info = {
-                'id': talk_id,
-                'title': title,
-                'url': rtmp_url,
-                'play_path': play_path,
-                'ext': 'flv',
-            }
+            'id': talk_id,
+            'title': title,
+            'url': rtmp_url,
+            'play_path': play_path,
+            'ext': 'flv',
+        }
         m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
         if m_slides is None:
             return video_info
         else:
-            return [
-                video_info,
-                # The slides video
-                {
-                    'id': talk_id + '-slides',
-                    'title': title,
-                    'url': rtmp_url,
-                    'play_path': m_slides.group(1),
-                    'ext': 'flv',
-                },
-            ]
+            return {
+                '_type': 'playlist',
+                'id': talk_id,
+                'title': title,
+                'entries': [
+                    video_info,
+                    # The slides video
+                    {
+                        'id': talk_id + '-slides',
+                        'title': title,
+                        'url': rtmp_url,
+                        'play_path': m_slides.group(1),
+                        'ext': 'flv',
+                    },
+                ],
+            }
index bce32a87330731e229c17e267ca7f65342d22952..1cca47771290beaa2d4090126e181afe4059f460 100644 (file)
@@ -51,7 +51,6 @@ class TEDIE(SubtitlesInfoExtractor):
         }
     }, {
         'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
-        'md5': '49144e345a899b8cb34d315f3b9cfeeb',
         'info_dict': {
             'id': '1972',
             'ext': 'mp4',
@@ -59,6 +58,13 @@ class TEDIE(SubtitlesInfoExtractor):
             'uploader': 'Gabby Giffords and Mark Kelly',
             'description': 'md5:5174aed4d0f16021b704120360f72b92',
         },
+    }, {
+        'url': 'http://www.ted.com/playlists/who_are_the_hackers',
+        'info_dict': {
+            'id': '10',
+            'title': 'Who are the hackers?',
+        },
+        'playlist_mincount': 6,
     }]
 
     _NATIVE_FORMATS = {
diff --git a/youtube_dl/extractor/telemb.py b/youtube_dl/extractor/telemb.py
new file mode 100644 (file)
index 0000000..1bbd0e7
--- /dev/null
@@ -0,0 +1,78 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import remove_start
+
+
+class TeleMBIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?telemb\.be/(?P<display_id>.+?)_d_(?P<id>\d+)\.html'
+    _TESTS = [
+        {
+            'url': 'http://www.telemb.be/mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-_d_13466.html',
+            'md5': 'f45ea69878516ba039835794e0f8f783',
+            'info_dict': {
+                'id': '13466',
+                'display_id': 'mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-',
+                'ext': 'mp4',
+                'title': 'Mons - Cook with Danielle : des cours de cuisine en anglais ! - Les reportages',
+                'description': 'md5:bc5225f47b17c309761c856ad4776265',
+                'thumbnail': 're:^http://.*\.(?:jpg|png)$',
+            }
+        },
+        {
+            # non-ASCII characters in download URL
+            'url': 'http://telemb.be/les-reportages-havre-incendie-mortel_d_13514.html',
+            'md5': '6e9682736e5ccd4eab7f21e855350733',
+            'info_dict': {
+                'id': '13514',
+                'display_id': 'les-reportages-havre-incendie-mortel',
+                'ext': 'mp4',
+                'title': 'Havré - Incendie mortel - Les reportages',
+                'description': 'md5:5e54cb449acb029c2b7734e2d946bd4a',
+                'thumbnail': 're:^http://.*\.(?:jpg|png)$',
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        formats = []
+        for video_url in re.findall(r'file\s*:\s*"([^"]+)"', webpage):
+            fmt = {
+                'url': video_url,
+                'format_id': video_url.split(':')[0]
+            }
+            rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url)
+            if rtmp:
+                fmt.update({
+                    'play_path': rtmp.group('playpath'),
+                    'app': rtmp.group('app'),
+                    'player_url': 'http://p.jwpcdn.com/6/10/jwplayer.flash.swf',
+                    'page_url': 'http://www.telemb.be',
+                    'preference': -1,
+                })
+            formats.append(fmt)
+        self._sort_formats(formats)
+
+        title = remove_start(self._og_search_title(webpage), 'TéléMB : ')
+        description = self._html_search_regex(
+            r'<meta property="og:description" content="(.+?)" />',
+            webpage, 'description', fatal=False)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py
new file mode 100644 (file)
index 0000000..4956f85
--- /dev/null
@@ -0,0 +1,84 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    fix_xml_ampersands,
+)
+
+
+class TNAFlixIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
+
+    _TITLE_REGEX = None
+    _DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
+    _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
+
+    _TEST = {
+        'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
+        'md5': 'ecf3498417d09216374fc5907f9c6ec0',
+        'info_dict': {
+            'id': '553878',
+            'display_id': 'Carmella-Decesare-striptease',
+            'ext': 'mp4',
+            'title': 'Carmella Decesare - striptease',
+            'description': '',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'duration': 91,
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        title = self._html_search_regex(
+            self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
+        description = self._html_search_regex(
+            self._DESCRIPTION_REGEX, webpage, 'description', fatal=False, default='')
+
+        age_limit = self._rta_search(webpage)
+
+        duration = self._html_search_meta('duration', webpage, 'duration', default=None)
+        if duration:
+            duration = parse_duration(duration[1:])
+
+        cfg_url = self._html_search_regex(
+            self._CONFIG_REGEX, webpage, 'flashvars.config')
+
+        cfg_xml = self._download_xml(
+            cfg_url, display_id, note='Downloading metadata',
+            transform_source=fix_xml_ampersands)
+
+        thumbnail = cfg_xml.find('./startThumb').text
+
+        formats = []
+        for item in cfg_xml.findall('./quality/item'):
+            video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text)
+            format_id = item.find('res').text
+            fmt = {
+                'url': video_url,
+                'format_id': format_id,
+            }
+            m = re.search(r'^(\d+)', format_id)
+            if m:
+                fmt['height'] = int(m.group(1))
+            formats.append(fmt)
+        self._sort_formats(formats)
+        
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'age_limit': age_limit,
+            'formats': formats,
+        }
index 0f389bd93a1f35eb35346f7ee99b0b91a9c9b876..2756f56d3a94ae8f2bed64aa39acf4d45616366b 100644 (file)
@@ -42,6 +42,13 @@ class ToypicsIE(InfoExtractor):
 class ToypicsUserIE(InfoExtractor):
     IE_DESC = 'Toypics user profile'
     _VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
+    _TEST = {
+        'url': 'http://videos.toypics.net/Mikey',
+        'info_dict': {
+            'id': 'Mikey',
+        },
+        'playlist_mincount': 19,
+    }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index 7a3891b89b736fb05f4c09d441d7eb56e68d8dcd..dcd823d0838dca23b27298cbf05ad47cc4261637 100644 (file)
@@ -1,5 +1,7 @@
 # coding: utf-8
 
+from __future__ import unicode_literals
+
 import re
 import json
 
@@ -9,22 +11,29 @@ from .common import InfoExtractor
 class TudouIE(InfoExtractor):
     _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
     _TESTS = [{
-        u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
-        u'file': u'159448201.f4v',
-        u'md5': u'140a49ed444bd22f93330985d8475fcb',
-        u'info_dict': {
-            u"title": u"卡马乔国足开大脚长传冲吊集锦"
+        'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
+        'md5': '140a49ed444bd22f93330985d8475fcb',
+        'info_dict': {
+            'id': '159448201',
+            'ext': 'f4v',
+            'title': '卡马乔国足开大脚长传冲吊集锦',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }, {
+        'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/',
+        'info_dict': {
+            'id': '117049447',
+            'ext': 'f4v',
+            'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
+            'thumbnail': 're:^https?://.*\.jpg$',
         }
-    },
-    {
-        u'url': u'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html',
-        u'file': u'todo.mp4',
-        u'md5': u'todo.mp4',
-        u'info_dict': {
-            u'title': u'todo.mp4',
+    }, {
+        'url': 'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html',
+        'info_dict': {
+            'title': 'todo.mp4',
         },
-        u'add_ie': [u'Youku'],
-        u'skip': u'Only works from China'
+        'add_ie': ['Youku'],
+        'skip': 'Only works from China'
     }]
 
     def _url_for_id(self, id, quality = None):
@@ -44,20 +53,22 @@ class TudouIE(InfoExtractor):
         if m and m.group(1):
             return {
                 '_type': 'url',
-                'url': u'youku:' + m.group(1),
+                'url': 'youku:' + m.group(1),
                 'ie_key': 'Youku'
             }
 
         title = self._search_regex(
-            r",kw:\s*['\"](.+?)[\"']", webpage, u'title')
+            r",kw:\s*['\"](.+?)[\"']", webpage, 'title')
         thumbnail_url = self._search_regex(
-            r",pic:\s*[\"'](.+?)[\"']", webpage, u'thumbnail URL', fatal=False)
+            r",pic:\s*[\"'](.+?)[\"']", webpage, 'thumbnail URL', fatal=False)
 
         segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')
         segments = json.loads(segs_json)
         # It looks like the keys are the arguments that have to be passed as
         # the hd field in the request url, we pick the higher
-        quality = sorted(segments.keys())[-1]
+        # Also, filter non-number qualities (see issue #3643).
+        quality = sorted(filter(lambda k: k.isdigit(), segments.keys()),
+                         key=lambda k: int(k))[-1]
         parts = segments[quality]
         result = []
         len_parts = len(parts)
@@ -67,12 +78,13 @@ class TudouIE(InfoExtractor):
             part_id = part['k']
             final_url = self._url_for_id(part_id, quality)
             ext = (final_url.split('?')[0]).split('.')[-1]
-            part_info = {'id': part_id,
-                          'url': final_url,
-                          'ext': ext,
-                          'title': title,
-                          'thumbnail': thumbnail_url,
-                          }
+            part_info = {
+                'id': '%s' % part_id,
+                'url': final_url,
+                'ext': ext,
+                'title': title,
+                'thumbnail': thumbnail_url,
+            }
             result.append(part_info)
 
         return result
index 2882c1809e0bd55c1e6c8b441c19293aeb64d301..306fe89741cce8b3c281c94349be266f221028b3 100644 (file)
@@ -10,7 +10,7 @@ from ..utils import (
 
 
 class TumblrIE(InfoExtractor):
-    _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)($|/)'
+    _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'
     _TESTS = [{
         'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
         'md5': '479bb068e5b16462f5176a6828829767',
@@ -56,13 +56,15 @@ class TumblrIE(InfoExtractor):
 
         # The only place where you can get a title, it's not complete,
         # but searching in other places doesn't work for all videos
-        video_title = self._html_search_regex(r'<title>(?P<title>.*?)(?: \| Tumblr)?</title>',
-            webpage, 'title', flags=re.DOTALL)
+        video_title = self._html_search_regex(
+            r'(?s)<title>(?P<title>.*?)(?: \| Tumblr)?</title>',
+            webpage, 'title')
 
-        return [{'id': video_id,
-                 'url': video_url,
-                 'title': video_title,
-                 'description': self._html_search_meta('description', webpage),
-                 'thumbnail': video_thumbnail,
-                 'ext': ext
-                 }]
+        return {
+            'id': video_id,
+             'url': video_url,
+             'title': video_title,
+             'description': self._html_search_meta('description', webpage),
+             'thumbnail': video_thumbnail,
+             'ext': ext,
+        }
diff --git a/youtube_dl/extractor/turbo.py b/youtube_dl/extractor/turbo.py
new file mode 100644 (file)
index 0000000..29703a8
--- /dev/null
@@ -0,0 +1,67 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    qualities,
+    xpath_text,
+)
+
+
+class TurboIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?turbo\.fr/videos-voiture/(?P<id>[0-9]+)-'
+    _API_URL = 'http://www.turbo.fr/api/tv/xml.php?player_generique=player_generique&id={0:}'
+    _TEST = {
+        'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html',
+        'md5': '33f4b91099b36b5d5a91f84b5bcba600',
+        'info_dict': {
+            'id': '454443',
+            'ext': 'mp4',
+            'duration': 3715,
+            'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
+            'description': 'Retrouvez dans cette rubrique toutes les vidéos de l\'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        playlist = self._download_xml(self._API_URL.format(video_id), video_id)
+        item = playlist.find('./channel/item')
+        if item is None:
+            raise ExtractorError('Playlist item was not found', expected=True)
+
+        title = xpath_text(item, './title', 'title')
+        duration = int_or_none(xpath_text(item, './durate', 'duration'))
+        thumbnail = xpath_text(item, './visuel_clip', 'thumbnail')
+        description = self._og_search_description(webpage)
+
+        formats = []
+        get_quality = qualities(['3g', 'sd', 'hq'])
+        for child in item:
+            m = re.search(r'url_video_(?P<quality>.+)', child.tag)
+            if m:
+                quality = m.group('quality')
+                formats.append({
+                    'format_id': quality,
+                    'url': child.text,
+                    'quality': get_quality(quality),
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'duration': duration,
+            'thumbnail': thumbnail,
+            'description': description,
+            'formats': formats,
+        }
index 0921cc5f822f5bf0bcfefae8d6ef063e88f6e29d..dc86978509da2b8680fd37bdd912028366c3928b 100644 (file)
@@ -5,80 +5,82 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
-    unified_strdate,
-    clean_html,
-    int_or_none,
+    float_or_none,
+    str_to_int,
 )
 
 
 class TvigleIE(InfoExtractor):
     IE_NAME = 'tvigle'
     IE_DESC = 'Интернет-телевидение Tvigle.ru'
-    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?[\?&]v(?:ideo)?=(?P<id>\d+)'
+    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$'
 
     _TESTS = [
         {
-            'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081',
-            'md5': '09afba4616666249f087efc6dcf83cb3',
+            'url': 'http://www.tvigle.ru/video/brat-2/',
+            'md5': '72cb7eab33e54314e1790da402d3c9c3',
             'info_dict': {
-                'id': '503081',
-                'ext': 'flv',
+                'id': '5119390',
+                'display_id': 'brat-2',
+                'ext': 'mp4',
                 'title': 'Брат 2 ',
-                'description': 'md5:f5a42970f50648cee3d7ad740f3ae769',
-                'upload_date': '20110919',
+                'description': 'md5:5751f4fe345a58e1692585c361294bd8',
+                'duration': 7356.369,
+                'age_limit': 0,
             },
         },
         {
-            'url': 'http://www.tvigle.ru/category/men/vysotskiy_vospominaniya02/?flt=196&v=676433',
-            'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
+            'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/',
+            'md5': 'd9012d7c7c598fe7a11d7fb46dc1f574',
             'info_dict': {
-                'id': '676433',
-                'ext': 'flv',
+                'id': '5142516',
+                'ext': 'mp4',
                 'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком',
                 'description': 'md5:027f7dc872948f14c96d19b4178428a4',
-                'upload_date': '20121218',
+                'duration': 186.080,
+                'age_limit': 0,
             },
         },
     ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
 
-        video_data = self._download_xml(
-            'http://www.tvigle.ru/xml/single.php?obj=%s' % video_id, video_id, 'Downloading video XML')
+        webpage = self._download_webpage(url, display_id)
 
-        video = video_data.find('./video')
+        video_id = self._html_search_regex(
+            r'<li class="video-preview current_playing" id="(\d+)">', webpage, 'video id')
 
-        title = video.get('name')
-        description = video.get('anons')
-        if description:
-            description = clean_html(description)
-        thumbnail = video_data.get('img')
-        upload_date = unified_strdate(video.get('date'))
-        like_count = int_or_none(video.get('vtp'))
+        video_data = self._download_json(
+            'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id)
 
-        formats = []
-        for num, (format_id, format_note) in enumerate([['low_file', 'SQ'], ['file', 'HQ'], ['hd', 'HD 720']]):
-            video_url = video.get(format_id)
-            if not video_url:
-                continue
-            formats.append({
-                'url': video_url,
-                'format_id': format_id,
-                'format_note': format_note,
-                'quality': num,
-            })
+        item = video_data['playlist']['items'][0]
+
+        title = item['title']
+        description = item['description']
+        thumbnail = item['thumbnail']
+        duration = float_or_none(item['durationMilliseconds'], 1000)
+        age_limit = str_to_int(item['ageRestrictions'])
 
+        formats = []
+        for vcodec, fmts in item['videos'].items():
+            for quality, video_url in fmts.items():
+                formats.append({
+                    'url': video_url,
+                    'format_id': '%s-%s' % (vcodec, quality),
+                    'vcodec': vcodec,
+                    'height': int(quality[:-1]),
+                })
         self._sort_formats(formats)
 
         return {
             'id': video_id,
+            'display_id': display_id,
             'title': title,
             'description': description,
             'thumbnail': thumbnail,
-            'upload_date': upload_date,
-            'like_count': like_count,
-            'age_limit': 18,
+            'duration': duration,
+            'age_limit': age_limit,
             'formats': formats,
         }
\ No newline at end of file
index a56a7ab5fc2e1c307c9811687ca03b0f4d79e6e5..445e0ec419ccc7eb2e23e522f6f3eba6010dcd69 100644 (file)
@@ -6,13 +6,28 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
+    compat_str,
     parse_iso8601,
     qualities,
 )
 
 
 class TVPlayIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?tvplay\.lv/parraides/[^/]+/(?P<id>\d+)'
+    IE_DESC = 'TV3Play and related services'
+    _VALID_URL = r'''(?x)http://(?:www\.)?
+        (?:tvplay\.lv/parraides|
+           tv3play\.lt/programos|
+           tv3play\.ee/sisu|
+           tv3play\.se/program|
+           tv6play\.se/program|
+           tv8play\.se/program|
+           tv10play\.se/program|
+           tv3play\.no/programmer|
+           viasat4play\.no/programmer|
+           tv6play\.no/programmer|
+           tv3play\.dk/programmer|
+        )/[^/]+/(?P<id>\d+)
+        '''
     _TESTS = [
         {
             'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
@@ -30,6 +45,134 @@ class TVPlayIE(InfoExtractor):
                 'skip_download': True,
             },
         },
+        {
+            'url': 'http://www.tv3play.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
+            'info_dict': {
+                'id': '409229',
+                'ext': 'flv',
+                'title': 'Moterys meluoja geriau',
+                'description': 'md5:9aec0fc68e2cbc992d2a140bd41fa89e',
+                'duration': 1330,
+                'timestamp': 1403769181,
+                'upload_date': '20140626',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.tv3play.ee/sisu/kodu-keset-linna/238551?autostart=true',
+            'info_dict': {
+                'id': '238551',
+                'ext': 'flv',
+                'title': 'Kodu keset linna 398537',
+                'description': 'md5:7df175e3c94db9e47c0d81ffa5d68701',
+                'duration': 1257,
+                'timestamp': 1292449761,
+                'upload_date': '20101215',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.tv3play.se/program/husraddarna/395385?autostart=true',
+            'info_dict': {
+                'id': '395385',
+                'ext': 'flv',
+                'title': 'Husräddarna S02E07',
+                'description': 'md5:f210c6c89f42d4fc39faa551be813777',
+                'duration': 2574,
+                'timestamp': 1400596321,
+                'upload_date': '20140520',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.tv6play.se/program/den-sista-dokusapan/266636?autostart=true',
+            'info_dict': {
+                'id': '266636',
+                'ext': 'flv',
+                'title': 'Den sista dokusåpan S01E08',
+                'description': 'md5:295be39c872520221b933830f660b110',
+                'duration': 1492,
+                'timestamp': 1330522854,
+                'upload_date': '20120229',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.tv8play.se/program/antikjakten/282756?autostart=true',
+            'info_dict': {
+                'id': '282756',
+                'ext': 'flv',
+                'title': 'Antikjakten S01E10',
+                'description': 'md5:1b201169beabd97e20c5ad0ad67b13b8',
+                'duration': 2646,
+                'timestamp': 1348575868,
+                'upload_date': '20120925',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.tv3play.no/programmer/anna-anka-soker-assistent/230898?autostart=true',
+            'info_dict': {
+                'id': '230898',
+                'ext': 'flv',
+                'title': 'Anna Anka søker assistent - Ep. 8',
+                'description': 'md5:f80916bf5bbe1c5f760d127f8dd71474',
+                'duration': 2656,
+                'timestamp': 1277720005,
+                'upload_date': '20100628',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.viasat4play.no/programmer/budbringerne/21873?autostart=true',
+            'info_dict': {
+                'id': '21873',
+                'ext': 'flv',
+                'title': 'Budbringerne program 10',
+                'description': 'md5:4db78dc4ec8a85bb04fd322a3ee5092d',
+                'duration': 1297,
+                'timestamp': 1254205102,
+                'upload_date': '20090929',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.tv6play.no/programmer/hotelinspektor-alex-polizzi/361883?autostart=true',
+            'info_dict': {
+                'id': '361883',
+                'ext': 'flv',
+                'title': 'Hotelinspektør Alex Polizzi - Ep. 10',
+                'description': 'md5:3ecf808db9ec96c862c8ecb3a7fdaf81',
+                'duration': 2594,
+                'timestamp': 1393236292,
+                'upload_date': '20140224',
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
     ]
 
     def _real_extract(self, url):
@@ -49,7 +192,7 @@ class TVPlayIE(InfoExtractor):
         quality = qualities(['hls', 'medium', 'high'])
         formats = []
         for format_id, video_url in streams['streams'].items():
-            if not video_url:
+            if not video_url or not isinstance(video_url, compat_str):
                 continue
             fmt = {
                 'format_id': format_id,
index 474610eec79483da01c14ca3e1d985b7aa8fd49a..f70978299ac9e682f5cdb99a7396541fd08c115c 100644 (file)
@@ -1,32 +1,66 @@
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
+from ..utils import qualities
+
 
 class UnistraIE(InfoExtractor):
-    _VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(\d+)'
-
-    _TEST = {
-        u'url': u'http://utv.unistra.fr/video.php?id_video=154',
-        u'file': u'154.mp4',
-        u'md5': u'736f605cfdc96724d55bb543ab3ced24',
-        u'info_dict': {
-            u'title': u'M!ss Yella',
-            u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
+    _VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
+
+    _TESTS = [
+        {
+            'url': 'http://utv.unistra.fr/video.php?id_video=154',
+            'md5': '736f605cfdc96724d55bb543ab3ced24',
+            'info_dict': {
+                'id': '154',
+                'ext': 'mp4',
+                'title': 'M!ss Yella',
+                'description': 'md5:104892c71bd48e55d70b902736b81bbf',
+            },
         },
-    }
+        {
+            'url': 'http://utv.unistra.fr/index.php?id_video=437',
+            'md5': '1ddddd6cccaae76f622ce29b8779636d',
+            'info_dict': {
+                'id': '437',
+                'ext': 'mp4',
+                'title': 'Prix Louise Weiss 2014',
+                'description': 'md5:cc3a8735f079f4fb6b0b570fc10c135a',
+            },
+        }
+    ]
 
     def _real_extract(self, url):
-        id = re.match(self._VALID_URL, url).group(1)
-        webpage = self._download_webpage(url, id)
-        file = re.search(r'file: "(.*?)",', webpage).group(1)
-        title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title')
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
 
-        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file
+        webpage = self._download_webpage(url, video_id)
 
-        return {'id': id,
-                'title': title,
-                'ext': 'mp4',
-                'url': video_url,
-                'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL),
-                'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'),
-                }
+        files = set(re.findall(r'file\s*:\s*"([^"]+)"', webpage))
+
+        quality = qualities(['SD', 'HD'])
+        formats = []
+        for file_path in files:
+            format_id = 'HD' if file_path.endswith('-HD.mp4') else 'SD'
+            formats.append({
+                'url': 'http://vod-flash.u-strasbg.fr:8080%s' % file_path,
+                'format_id': format_id,
+                'quality': quality(format_id)
+            })
+
+        title = self._html_search_regex(
+            r'<title>UTV - (.*?)</', webpage, 'title')
+        description = self._html_search_regex(
+            r'<meta name="Description" content="(.*?)"', webpage, 'description', flags=re.DOTALL)
+        thumbnail = self._search_regex(
+            r'image: "(.*?)"', webpage, 'thumbnail')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'formats': formats
+        }
index 488b10df96e298c683cd02287e2da0c49f21a1cc..994b60a76b88ef4d7ff8be630c2bafbd989e2c96 100644 (file)
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
-import json
 import re
 
 from .common import InfoExtractor
@@ -68,21 +67,36 @@ class UstreamIE(InfoExtractor):
 class UstreamChannelIE(InfoExtractor):
     _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
     IE_NAME = 'ustream:channel'
+    _TEST = {
+        'url': 'http://www.ustream.tv/channel/channeljapan',
+        'info_dict': {
+            'id': '10874166',
+        },
+        'playlist_mincount': 54,
+    }
 
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
-        slug = m.group('slug')
-        webpage = self._download_webpage(url, slug)
+        display_id = m.group('slug')
+        webpage = self._download_webpage(url, display_id)
         channel_id = get_meta_content('ustream:channel_id', webpage)
 
         BASE = 'http://www.ustream.tv'
         next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
         video_ids = []
         while next_url:
-            reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id))
+            reply = self._download_json(
+                compat_urlparse.urljoin(BASE, next_url), display_id,
+                note='Downloading video information (next: %d)' % (len(video_ids) + 1))
             video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
             next_url = reply['nextUrl']
 
-        urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]
-        url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls]
-        return self.playlist_result(url_entries, channel_id)
+        entries = [
+            self.url_result('http://www.ustream.tv/recorded/' + vid, 'Ustream')
+            for vid in video_ids]
+        return {
+            '_type': 'playlist',
+            'id': channel_id,
+            'display_id': display_id,
+            'entries': entries,
+        }
index b1c854a646c601d4dadaa1dce7fab8d6fc315b3d..77b1f91ce3636cbb8f805f60ff06d95852cf4940 100644 (file)
@@ -16,8 +16,9 @@ class VeeHDIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://veehd.com/video/4686958',
-        'file': '4686958.mp4',
         'info_dict': {
+            'id': '4686958',
+            'ext': 'mp4',
             'title': 'Time Lapse View from Space ( ISS)',
             'uploader_id': 'spotted',
             'description': 'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py
new file mode 100644 (file)
index 0000000..7d27d6c
--- /dev/null
@@ -0,0 +1,119 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import float_or_none
+
+
+class VGTVIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?vgtv\.no/#!/(?:.*)/(?P<id>[0-9]+)'
+    _TESTS = [
+        {
+            # streamType: vod
+            'url': 'http://www.vgtv.no/#!/video/84196/hevnen-er-soet-episode-10-abu',
+            'md5': 'b8be7a234cebb840c0d512c78013e02f',
+            'info_dict': {
+                'id': '84196',
+                'ext': 'mp4',
+                'title': 'Hevnen er søt episode 10: Abu',
+                'description': 'md5:e25e4badb5f544b04341e14abdc72234',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'duration': 648.000,
+                'timestamp': 1404626400,
+                'upload_date': '20140706',
+                'view_count': int,
+            },
+        },
+        {
+            # streamType: wasLive
+            'url': 'http://www.vgtv.no/#!/live/100764/opptak-vgtv-foelger-em-kvalifiseringen',
+            'info_dict': {
+                'id': '100764',
+                'ext': 'mp4',
+                'title': 'OPPTAK: VGTV følger EM-kvalifiseringen',
+                'description': 'md5:3772d9c0dc2dff92a886b60039a7d4d3',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'duration': 9056.000,
+                'timestamp': 1410113864,
+                'upload_date': '20140907',
+                'view_count': int,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+        {
+            # streamType: live
+            'url': 'http://www.vgtv.no/#!/live/100015/direkte-her-kan-du-se-laksen-live-fra-suldalslaagen',
+            'info_dict': {
+                'id': '100015',
+                'ext': 'mp4',
+                'title': 'DIREKTE: Her kan du se laksen live fra Suldalslågen!',
+                'description': 'md5:9a60cc23fa349f761628924e56eeec2d',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'duration': 0,
+                'timestamp': 1407423348,
+                'upload_date': '20140807',
+                'view_count': int,
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        data = self._download_json(
+            'http://svp.vg.no/svp/api/v1/vgtv/assets/%s?appName=vgtv-website' % video_id,
+            video_id, 'Downloading media JSON')
+
+        streams = data['streamUrls']
+
+        formats = []
+
+        hls_url = streams.get('hls')
+        if hls_url:
+            formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4'))
+
+        hds_url = streams.get('hds')
+        if hds_url:
+            formats.extend(self._extract_f4m_formats(hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id))
+
+        mp4_url = streams.get('mp4')
+        if mp4_url:
+            _url = hls_url or hds_url
+            MP4_URL_TEMPLATE = '%s/%%s.%s' % (mp4_url.rpartition('/')[0], mp4_url.rpartition('.')[-1])
+            for mp4_format in _url.split(','):
+                m = re.search('(?P<width>\d+)_(?P<height>\d+)_(?P<vbr>\d+)', mp4_format)
+                if not m:
+                    continue
+                width = int(m.group('width'))
+                height = int(m.group('height'))
+                vbr = int(m.group('vbr'))
+                formats.append({
+                    'url': MP4_URL_TEMPLATE % mp4_format,
+                    'format_id': 'mp4-%s' % vbr,
+                    'width': width,
+                    'height': height,
+                    'vbr': vbr,
+                    'preference': 1,
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': data['title'],
+            'description': data['description'],
+            'thumbnail': data['images']['main'] + '?t[]=900x506q80',
+            'timestamp': data['published'],
+            'duration': float_or_none(data['duration'], 1000),
+            'view_count': data['displays'],
+            'formats': formats,
+        }
\ No newline at end of file
index 55f6cd0d8e1d9e7de2fd49f0e123b23e47496f13..bc01d7fbf583eb44ccc708c7fb00b4b113f82bc0 100644 (file)
@@ -57,6 +57,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
         (?P<proto>(?:https?:)?//)?
         (?:(?:www|(?P<player>player))\.)?
         vimeo(?P<pro>pro)?\.com/
+        (?!channels/[^/?#]+/?(?:$|[?#])|album/)
         (?:.*?/)?
         (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)?
         (?:videos?/)?
@@ -151,30 +152,8 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
                 'duration': 62,
             }
         },
-        {
-            'note': 'video player needs Referer',
-            'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053',
-            'md5': '6295fdab8f4bf6a002d058b2c6dce276',
-            'info_dict': {
-                'id': '91613211',
-                'ext': 'mp4',
-                'title': 'Death by dogma versus assembling agile - Sander Hoogendoorn',
-                'uploader': 'DevWeek Events',
-                'duration': 2773,
-                'thumbnail': 're:^https?://.*\.jpg$',
-            }
-        }
     ]
 
-    @classmethod
-    def suitable(cls, url):
-        if VimeoChannelIE.suitable(url):
-            # Otherwise channel urls like http://vimeo.com/channels/31259 would
-            # match
-            return False
-        else:
-            return super(VimeoIE, cls).suitable(url)
-
     def _verify_video_password(self, url, video_id, webpage):
         password = self._downloader.params.get('videopassword', None)
         if password is None:
@@ -393,9 +372,16 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
 
 class VimeoChannelIE(InfoExtractor):
     IE_NAME = 'vimeo:channel'
-    _VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)/?(\?.*)?$'
+    _VALID_URL = r'https?://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
     _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
     _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
+    _TESTS = [{
+        'url': 'http://vimeo.com/channels/tributes',
+        'info_dict': {
+            'title': 'Vimeo Tributes',
+        },
+        'playlist_mincount': 25,
+    }]
 
     def _page_url(self, base_url, pagenum):
         return '%s/videos/page:%d/' % (base_url, pagenum)
@@ -429,14 +415,15 @@ class VimeoChannelIE(InfoExtractor):
 
 class VimeoUserIE(VimeoChannelIE):
     IE_NAME = 'vimeo:user'
-    _VALID_URL = r'(?:https?://)?vimeo\.com/(?P<name>[^/]+)(?:/videos|[#?]|$)'
+    _VALID_URL = r'https?://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
     _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
-
-    @classmethod
-    def suitable(cls, url):
-        if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url) or VimeoAlbumIE.suitable(url) or VimeoGroupsIE.suitable(url):
-            return False
-        return super(VimeoUserIE, cls).suitable(url)
+    _TESTS = [{
+        'url': 'http://vimeo.com/nkistudio/videos',
+        'info_dict': {
+            'title': 'Nki',
+        },
+        'playlist_mincount': 66,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -446,8 +433,15 @@ class VimeoUserIE(VimeoChannelIE):
 
 class VimeoAlbumIE(VimeoChannelIE):
     IE_NAME = 'vimeo:album'
-    _VALID_URL = r'(?:https?://)?vimeo\.com/album/(?P<id>\d+)'
+    _VALID_URL = r'https?://vimeo\.com/album/(?P<id>\d+)'
     _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
+    _TESTS = [{
+        'url': 'http://vimeo.com/album/2632481',
+        'info_dict': {
+            'title': 'Staff Favorites: November 2013',
+        },
+        'playlist_mincount': 13,
+    }]
 
     def _page_url(self, base_url, pagenum):
         return '%s/page:%d/' % (base_url, pagenum)
@@ -461,6 +455,13 @@ class VimeoAlbumIE(VimeoChannelIE):
 class VimeoGroupsIE(VimeoAlbumIE):
     IE_NAME = 'vimeo:group'
     _VALID_URL = r'(?:https?://)?vimeo\.com/groups/(?P<name>[^/]+)'
+    _TESTS = [{
+        'url': 'http://vimeo.com/groups/rolexawards',
+        'info_dict': {
+            'title': 'Rolex Awards for Enterprise',
+        },
+        'playlist_mincount': 73,
+    }]
 
     def _extract_list_title(self, webpage):
         return self._og_search_title(webpage)
@@ -474,8 +475,8 @@ class VimeoGroupsIE(VimeoAlbumIE):
 class VimeoReviewIE(InfoExtractor):
     IE_NAME = 'vimeo:review'
     IE_DESC = 'Review pages on vimeo'
-    _VALID_URL = r'(?:https?://)?vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
-    _TEST = {
+    _VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
+    _TESTS = [{
         'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
         'file': '75524534.mp4',
         'md5': 'c507a72f780cacc12b2248bb4006d253',
@@ -483,7 +484,19 @@ class VimeoReviewIE(InfoExtractor):
             'title': "DICK HARDWICK 'Comedian'",
             'uploader': 'Richard Hardwick',
         }
-    }
+    }, {
+        'note': 'video player needs Referer',
+        'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053',
+        'md5': '6295fdab8f4bf6a002d058b2c6dce276',
+        'info_dict': {
+            'id': '91613211',
+            'ext': 'mp4',
+            'title': 'Death by dogma versus assembling agile - Sander Hoogendoorn',
+            'uploader': 'DevWeek Events',
+            'duration': 2773,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -498,6 +511,10 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
     _VALID_URL = r'https?://vimeo\.com/home/watchlater|:vimeowatchlater'
     _LOGIN_REQUIRED = True
     _TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
+    _TESTS = [{
+        'url': 'http://vimeo.com/home/watchlater',
+        'only_matching': True,
+    }]
 
     def _real_initialize(self):
         self._login()
index 076c87119943f3879845ccc3aaf74cdbebf73859..e7754158dcde7c44fe5f975f21e17648e445fd2b 100644 (file)
@@ -65,6 +65,13 @@ class VineUserIE(InfoExtractor):
     IE_NAME = 'vine:user'
     _VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$'
     _VINE_BASE_URL = "https://vine.co/"
+    _TEST = {
+        'url': 'https://vine.co/Visa',
+        'info_dict': {
+            'id': 'Visa',
+        },
+        'playlist_mincount': 47,
+    }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py
new file mode 100644 (file)
index 0000000..2d23eff
--- /dev/null
@@ -0,0 +1,125 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    str_to_int,
+)
+
+
+class VpornIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
+            'md5': 'facf37c1b86546fa0208058546842c55',
+            'info_dict': {
+                'id': '497944',
+                'display_id': 'violet-on-her-th-birthday',
+                'ext': 'mp4',
+                'title': 'Violet on her 19th birthday',
+                'description': 'Violet dances in front of the camera which is sure to get you horny.',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'uploader': 'kileyGrope',
+                'categories': ['Masturbation', 'Teen'],
+                'duration': 393,
+                'age_limit': 18,
+                'view_count': int,
+                'like_count': int,
+                'dislike_count': int,
+                'comment_count': int,
+            }
+        },
+        {
+            'url': 'http://www.vporn.com/female/hana-shower/523564/',
+            'md5': 'ced35a4656198a1664cf2cda1575a25f',
+            'info_dict': {
+                'id': '523564',
+                'display_id': 'hana-shower',
+                'ext': 'mp4',
+                'title': 'Hana Shower',
+                'description': 'Hana showers at the bathroom.',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'uploader': 'Hmmmmm',
+                'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female'],
+                'duration': 588,
+                'age_limit': 18,
+                'view_count': int,
+                'like_count': int,
+                'dislike_count': int,
+                'comment_count': int,
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        title = self._html_search_regex(
+            r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
+        description = self._html_search_regex(
+            r'<div class="description_txt">(.*?)</div>', webpage, 'description', fatal=False)
+        thumbnail = self._html_search_regex(
+            r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', fatal=False, default=None)
+        if thumbnail:
+            thumbnail = 'http://www.vporn.com' + thumbnail
+
+        uploader = self._html_search_regex(
+            r'(?s)UPLOADED BY.*?<a href="/user/[^"]+">([^<]+)</a>',
+            webpage, 'uploader', fatal=False)
+
+        categories = re.findall(r'<a href="/cat/[^"]+">([^<]+)</a>', webpage)
+
+        duration = parse_duration(self._search_regex(
+            r'duration (\d+ min \d+ sec)', webpage, 'duration', fatal=False))
+
+        view_count = str_to_int(self._html_search_regex(
+            r'<span>([\d,\.]+) VIEWS</span>', webpage, 'view count', fatal=False))
+        like_count = str_to_int(self._html_search_regex(
+            r'<span id="like" class="n">([\d,\.]+)</span>', webpage, 'like count', fatal=False))
+        dislike_count = str_to_int(self._html_search_regex(
+            r'<span id="dislike" class="n">([\d,\.]+)</span>', webpage, 'dislike count', fatal=False))
+        comment_count = str_to_int(self._html_search_regex(
+            r'<h4>Comments \(<b>([\d,\.]+)</b>\)</h4>', webpage, 'comment count', fatal=False))
+
+        formats = []
+
+        for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage):
+            video_url = video[1]
+            fmt = {
+                'url': video_url,
+                'format_id': video[0],
+            }
+            m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)_(?P<vbr>\d+)k\.mp4$', video_url)
+            if m:
+                fmt.update({
+                    'width': int(m.group('width')),
+                    'height': int(m.group('height')),
+                    'vbr': int(m.group('vbr')),
+                })
+            formats.append(fmt)
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'categories': categories,
+            'duration': duration,
+            'view_count': view_count,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'comment_count': comment_count,
+            'age_limit': 18,
+            'formats': formats,
+        }
index cb8f0887de292c2d36f5f2ac698e7321674e4b2d..88bbbb21967c6807c536ecc5b06d8d8f41095219 100644 (file)
@@ -13,6 +13,9 @@ class WashingtonPostIE(InfoExtractor):
     _VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
     _TEST = {
         'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
+        'info_dict': {
+            'title': 'Sinkhole of bureaucracy',
+        },
         'playlist': [{
             'md5': 'c3f4b4922ffa259243f68e928db2db8c',
             'info_dict': {
index 00b6d1eba33a6686319d47846c34476ce8b387c7..4e8fbde8d6bbb072e7fc3475288c6c2e93360993 100644 (file)
@@ -18,7 +18,6 @@ class XHamsterIE(InfoExtractor):
     _TESTS = [
         {
             'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
-            'md5': '8281348b8d3c53d39fffb377d24eac4e',
             'info_dict': {
                 'id': '1509445',
                 'ext': 'mp4',
@@ -31,7 +30,6 @@ class XHamsterIE(InfoExtractor):
         },
         {
             'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
-            'md5': '4cbd8d56708ecb4fb4124c23e4acb81a',
             'info_dict': {
                 'id': '2221348',
                 'ext': 'mp4',
index b293e2665b81b9a486bff2ec91b410da4a6d9998..273d93d9ee544b74f22daa20a195b8e4bc7b05a2 100644 (file)
@@ -77,9 +77,17 @@ class XTubeIE(InfoExtractor):
             'age_limit': 18,
         }
 
+
 class XTubeUserIE(InfoExtractor):
     IE_DESC = 'XTube user profile'
     _VALID_URL = r'https?://(?:www\.)?xtube\.com/community/profile\.php\?(.*?)user=(?P<username>[^&#]+)(?:$|[&#])'
+    _TEST = {
+        'url': 'http://www.xtube.com/community/profile.php?user=greenshowers',
+        'info_dict': {
+            'id': 'greenshowers',
+        },
+        'playlist_mincount': 155,
+    }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
index 0e3b33b1652fe1242b36cb79d131acb6694066da..3ab6017cdb51a3eaef6a3a1686719fba714780dd 100644 (file)
@@ -71,7 +71,8 @@ class YahooIE(InfoExtractor):
         if items_json is None:
             CONTENT_ID_REGEXES = [
                 r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
-                r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"'
+                r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
+                r'"first_videoid"\s*:\s*"([^"]+)"',
             ]
             long_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID')
             video_id = long_id
index fcb5ff758deae198614e821dc132871e5fb90679..b86331e3cfa39ec8d3f287e829900b414892beee 100644 (file)
@@ -9,7 +9,7 @@ from ..utils import (
 
 
 class YouJizzIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
+    _VALID_URL = r'^https?://(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
     _TEST = {
         'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
         'file': '2189178.flv',
index d456c4da522d689ac7bcbd33c5f8a3b1204c3b00..7bfda45e76e0d4ca3b6bfd6c3a8ec9f38de453e1 100644 (file)
@@ -23,7 +23,6 @@ class YouPornIE(InfoExtractor):
     _VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
     _TEST = {
         'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
-        'md5': '71ec5fcfddacf80f495efa8b6a8d9a89',
         'info_dict': {
             'id': '505835',
             'ext': 'mp4',
index 75044d71a3fd9f81fa5d89ab8283eb13e5d8191d..b54c69122afb1265acc545cd79daa8ffd09a1752 100644 (file)
@@ -1,7 +1,8 @@
 # coding: utf-8
 
-import errno
-import io
+from __future__ import unicode_literals
+
+
 import itertools
 import json
 import os.path
@@ -21,7 +22,6 @@ from ..utils import (
     compat_str,
 
     clean_html,
-    get_cachedir,
     get_element_by_id,
     get_element_by_attribute,
     ExtractorError,
@@ -30,7 +30,6 @@ from ..utils import (
     unescapeHTML,
     unified_strdate,
     orderedSet,
-    write_json_file,
     uppercase_escape,
 )
 
@@ -73,29 +72,29 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             return
 
         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
-                                  login_page, u'Login GALX parameter')
+                                  login_page, 'Login GALX parameter')
 
         # Log in
         login_form_strs = {
-                u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
-                u'Email': username,
-                u'GALX': galx,
-                u'Passwd': password,
-
-                u'PersistentCookie': u'yes',
-                u'_utf8': u'霱',
-                u'bgresponse': u'js_disabled',
-                u'checkConnection': u'',
-                u'checkedDomains': u'youtube',
-                u'dnConn': u'',
-                u'pstMsg': u'0',
-                u'rmShown': u'1',
-                u'secTok': u'',
-                u'signIn': u'Sign in',
-                u'timeStmp': u'',
-                u'service': u'youtube',
-                u'uilel': u'3',
-                u'hl': u'en_US',
+                'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
+                'Email': username,
+                'GALX': galx,
+                'Passwd': password,
+
+                'PersistentCookie': 'yes',
+                '_utf8': '霱',
+                'bgresponse': 'js_disabled',
+                'checkConnection': '',
+                'checkedDomains': 'youtube',
+                'dnConn': '',
+                'pstMsg': '0',
+                'rmShown': '1',
+                'secTok': '',
+                'signIn': 'Sign in',
+                'timeStmp': '',
+                'service': 'youtube',
+                'uilel': '3',
+                'hl': 'en_US',
         }
 
         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
@@ -136,19 +135,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             timeStmp = match.group(1)
 
             tfa_form_strs = {
-                u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
-                u'smsToken': u'',
-                u'smsUserPin': tfa_code,
-                u'smsVerifyPin': u'Verify',
-
-                u'PersistentCookie': u'yes',
-                u'checkConnection': u'',
-                u'checkedDomains': u'youtube',
-                u'pstMsg': u'1',
-                u'secTok': secTok,
-                u'timeStmp': timeStmp,
-                u'service': u'youtube',
-                u'hl': u'en_US',
+                'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
+                'smsToken': '',
+                'smsUserPin': tfa_code,
+                'smsVerifyPin': 'Verify',
+
+                'PersistentCookie': 'yes',
+                'checkConnection': '',
+                'checkedDomains': 'youtube',
+                'pstMsg': '1',
+                'secTok': secTok,
+                'timeStmp': timeStmp,
+                'service': 'youtube',
+                'hl': 'en_US',
             }
             tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in tfa_form_strs.items())
             tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
@@ -200,10 +199,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 
 
 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
-    IE_DESC = u'YouTube.com'
+    IE_DESC = 'YouTube.com'
     _VALID_URL = r"""(?x)^
                      (
-                         (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional)
+                         (?:https?://|//)                                    # http(s):// or protocol-independent URL
                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
                             (?:www\.)?deturl\.com/www\.youtube\.com/|
                             (?:www\.)?pwnyoutube\.com/|
@@ -221,10 +220,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                              )
                          ))
                          |youtu\.be/                                          # just youtu.be/xxxx
-                         |https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
+                         |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
                          )
                      )?                                                       # all until now is optional -> you can pass the naked ID
                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
+                     (?!.*?&list=)                                            # combined list/video URLs are handled by the playlist IE
                      (?(1).+)?                                                # if we found the ID, everything can follow
                      $"""
     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
@@ -304,7 +304,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         '_rtmp': {'protocol': 'rtmp'},
     }
 
-    IE_NAME = u'youtube'
+    IE_NAME = 'youtube'
     _TESTS = [
         {
             u"url":  u"http://www.youtube.com/watch?v=BaW_jenozKc",
@@ -316,6 +316,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 u"upload_date": u"20121002",
                 u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .",
                 u"categories": [u'Science & Technology'],
+                'like_count': int,
+                'dislike_count': int,
             }
         },
         {
@@ -361,7 +363,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             u"info_dict": {
                 u"upload_date": "20121002",
                 u"uploader_id": "8KVIDEO",
-                u"description": "No description available.",
+                u"description": '',
                 u"uploader": "8KVIDEO",
                 u"title": "UHDTV TEST 8K VIDEO.mp4"
             },
@@ -372,30 +374,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         },
         # DASH manifest with encrypted signature
         {
-            u'url': u'https://www.youtube.com/watch?v=IB3lcPjvWLA',
-            u'info_dict': {
-                u'id': u'IB3lcPjvWLA',
-                u'ext': u'm4a',
-                u'title': u'Afrojack - The Spark ft. Spree Wilson',
-                u'description': u'md5:9717375db5a9a3992be4668bbf3bc0a8',
-                u'uploader': u'AfrojackVEVO',
-                u'uploader_id': u'AfrojackVEVO',
-                u'upload_date': u'20131011',
+            'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
+            'info_dict': {
+                'id': 'IB3lcPjvWLA',
+                'ext': 'm4a',
+                'title': 'Afrojack - The Spark ft. Spree Wilson',
+                'description': 'md5:9717375db5a9a3992be4668bbf3bc0a8',
+                'uploader': 'AfrojackVEVO',
+                'uploader_id': 'AfrojackVEVO',
+                'upload_date': '20131011',
             },
             u"params": {
-                u'youtube_include_dash_manifest': True,
-                u'format': '141',
+                'youtube_include_dash_manifest': True,
+                'format': '141',
             },
         },
     ]
 
-
-    @classmethod
-    def suitable(cls, url):
-        """Receives a URL and returns True if suitable for this IE."""
-        if YoutubePlaylistIE.suitable(url): return False
-        return re.match(cls._VALID_URL, url) is not None
-
     def __init__(self, *args, **kwargs):
         super(YoutubeIE, self).__init__(*args, **kwargs)
         self._player_cache = {}
@@ -418,7 +413,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
     def _signature_cache_id(self, example_sig):
         """ Return a string representation of a signature """
-        return u'.'.join(compat_str(len(part)) for part in example_sig.split('.'))
+        return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
 
     def _extract_signature_function(self, video_id, player_url, example_sig):
         id_m = re.match(
@@ -433,26 +428,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         func_id = '%s_%s_%s' % (
             player_type, player_id, self._signature_cache_id(example_sig))
         assert os.path.basename(func_id) == func_id
-        cache_dir = get_cachedir(self._downloader.params)
 
-        cache_enabled = cache_dir is not None
-        if cache_enabled:
-            cache_fn = os.path.join(os.path.expanduser(cache_dir),
-                                    u'youtube-sigfuncs',
-                                    func_id + '.json')
-            try:
-                with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
-                    cache_spec = json.load(cachef)
-                return lambda s: u''.join(s[i] for i in cache_spec)
-            except IOError:
-                pass  # No cache available
-            except ValueError:
-                try:
-                    file_size = os.path.getsize(cache_fn)
-                except (OSError, IOError) as oe:
-                    file_size = str(oe)
-                self._downloader.report_warning(
-                    u'Cache %s failed (%s)' % (cache_fn, file_size))
+        cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id)
+        if cache_spec is not None:
+            return lambda s: ''.join(s[i] for i in cache_spec)
 
         if player_type == 'js':
             code = self._download_webpage(
@@ -470,31 +449,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         else:
             assert False, 'Invalid player type %r' % player_type
 
-        if cache_enabled:
-            try:
-                test_string = u''.join(map(compat_chr, range(len(example_sig))))
-                cache_res = res(test_string)
-                cache_spec = [ord(c) for c in cache_res]
-                try:
-                    os.makedirs(os.path.dirname(cache_fn))
-                except OSError as ose:
-                    if ose.errno != errno.EEXIST:
-                        raise
-                write_json_file(cache_spec, cache_fn)
-            except Exception:
-                tb = traceback.format_exc()
-                self._downloader.report_warning(
-                    u'Writing cache to %r failed: %s' % (cache_fn, tb))
+        if cache_spec is None:
+            test_string = ''.join(map(compat_chr, range(len(example_sig))))
+            cache_res = res(test_string)
+            cache_spec = [ord(c) for c in cache_res]
 
+        self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec)
         return res
 
     def _print_sig_code(self, func, example_sig):
         def gen_sig_code(idxs):
             def _genslice(start, end, step):
-                starts = u'' if start == 0 else str(start)
-                ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
-                steps = u'' if step == 1 else (u':%d' % step)
-                return u's[%s%s%s]' % (starts, ends, steps)
+                starts = '' if start == 0 else str(start)
+                ends = (u':%d' % (end+step)) if end + step >= 0 else ':'
+                steps = '' if step == 1 else (u':%d' % step)
+                return 's[%s%s%s]' % (starts, ends, steps)
 
             step = None
             start = '(Never used)'  # Quelch pyflakes warnings - start will be
@@ -511,26 +480,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                     start = prev
                     continue
                 else:
-                    yield u's[%d]' % prev
+                    yield 's[%d]' % prev
             if step is None:
-                yield u's[%d]' % i
+                yield 's[%d]' % i
             else:
                 yield _genslice(start, i, step)
 
-        test_string = u''.join(map(compat_chr, range(len(example_sig))))
+        test_string = ''.join(map(compat_chr, range(len(example_sig))))
         cache_res = func(test_string)
         cache_spec = [ord(c) for c in cache_res]
-        expr_code = u' + '.join(gen_sig_code(cache_spec))
+        expr_code = ' + '.join(gen_sig_code(cache_spec))
         signature_id_tuple = '(%s)' % (
             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
         code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
-                u'    return %s\n') % (signature_id_tuple, expr_code)
+                '    return %s\n') % (signature_id_tuple, expr_code)
         self.to_screen(u'Extracted signature function:\n' + code)
 
     def _parse_sig_js(self, jscode):
         funcname = self._search_regex(
             r'signature=([$a-zA-Z]+)', jscode,
-             u'Initial JS player signature function name')
+             'Initial JS player signature function name')
 
         jsi = JSInterpreter(jscode)
         initial_function = jsi.extract_function(funcname)
@@ -538,9 +507,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
     def _parse_sig_swf(self, file_contents):
         swfi = SWFInterpreter(file_contents)
-        TARGET_CLASSNAME = u'SignatureDecipher'
+        TARGET_CLASSNAME = 'SignatureDecipher'
         searched_class = swfi.extract_class(TARGET_CLASSNAME)
-        initial_function = swfi.extract_function(searched_class, u'decipher')
+        initial_function = swfi.extract_function(searched_class, 'decipher')
         return lambda s: initial_function([s])
 
     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
@@ -550,7 +519,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             raise ExtractorError(u'Cannot decrypt signature without player_url')
 
         if player_url.startswith(u'//'):
-            player_url = u'https:' + player_url
+            player_url = 'https:' + player_url
         try:
             player_id = (player_url, self._signature_cache_id(s))
             if player_id not in self._player_cache:
@@ -565,7 +534,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         except Exception as e:
             tb = traceback.format_exc()
             raise ExtractorError(
-                u'Signature extraction failed: ' + tb, cause=e)
+                'Signature extraction failed: ' + tb, cause=e)
 
     def _get_available_subtitles(self, video_id, webpage):
         try:
@@ -588,7 +557,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
                 'name': unescapeHTML(l[0]).encode('utf-8'),
             })
-            url = u'https://www.youtube.com/api/timedtext?' + params
+            url = 'https://www.youtube.com/api/timedtext?' + params
             sub_lang_list[lang] = url
         if not sub_lang_list:
             self._downloader.report_warning(u'video doesn\'t have subtitles')
@@ -601,7 +570,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         sub_format = self._downloader.params.get('subtitlesformat', 'srt')
         self.to_screen(u'%s: Looking for automatic captions' % video_id)
         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
-        err_msg = u'Couldn\'t find automatic captions for %s' % video_id
+        err_msg = 'Couldn\'t find automatic captions for %s' % video_id
         if mobj is None:
             self._downloader.report_warning(err_msg)
             return {}
@@ -657,7 +626,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             urls = filter(lambda l: l and not l.startswith('#'),
                             lines)
             return urls
-        manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
+        manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
         formats_urls = _get_urls(manifest)
         for format_url in formats_urls:
             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
@@ -670,8 +639,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
     def _real_extract(self, url):
         proto = (
-            u'http' if self._downloader.params.get('prefer_insecure', False)
-            else u'https')
+            'http' if self._downloader.params.get('prefer_insecure', False)
+            else 'https')
 
         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
         mobj = re.search(self._NEXT_URL_RE, url)
@@ -722,11 +691,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         if 'token' not in video_info:
             if 'reason' in video_info:
                 raise ExtractorError(
-                    u'YouTube said: %s' % video_info['reason'][0],
+                    'YouTube said: %s' % video_info['reason'][0],
                     expected=True, video_id=video_id)
             else:
                 raise ExtractorError(
-                    u'"token" parameter not in video info for unknown reason',
+                    '"token" parameter not in video info for unknown reason',
                     video_id=video_id)
 
         if 'view_count' in video_info:
@@ -759,7 +728,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             video_title = video_info['title'][0]
         else:
             self._downloader.report_warning(u'Unable to extract video title')
-            video_title = u'_'
+            video_title = '_'
 
         # thumbnail image
         # We try first to get a high quality image:
@@ -784,7 +753,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
             upload_date = unified_strdate(upload_date)
 
-        m_cat_container = get_element_by_id("eow-category", video_webpage)
+        m_cat_container = self._search_regex(
+            r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
+            video_webpage, 'categories', fatal=False)
         if m_cat_container:
             category = self._html_search_regex(
                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
@@ -811,17 +782,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             if fd_mobj:
                 video_description = unescapeHTML(fd_mobj.group(1))
             else:
-                video_description = u''
+                video_description = ''
 
-        def _extract_count(klass):
+        def _extract_count(count_name):
             count = self._search_regex(
-                r'class="%s">([\d,]+)</span>' % re.escape(klass),
-                video_webpage, klass, default=None)
+                r'id="watch-%s"[^>]*>.*?([\d,]+)\s*</span>' % re.escape(count_name),
+                video_webpage, count_name, default=None)
             if count is not None:
                 return int(count.replace(',', ''))
             return None
-        like_count = _extract_count(u'likes-count')
-        dislike_count = _extract_count(u'dislikes-count')
+        like_count = _extract_count(u'like')
+        dislike_count = _extract_count(u'dislike')
 
         # subtitles
         video_subtitles = self.extract_subtitles(video_id, video_webpage)
@@ -858,7 +829,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             if m_s is not None:
                 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
-            m_s = re_signature.search(args.get('adaptive_fmts', u''))
+            m_s = re_signature.search(args.get('adaptive_fmts', ''))
             if m_s is not None:
                 if 'adaptive_fmts' in video_info:
                     video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts']
@@ -908,12 +879,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                     if not age_gate:
                         jsplayer_url_json = self._search_regex(
                             r'"assets":.+?"js":\s*("[^"]+")',
-                            video_webpage, u'JS player URL')
+                            video_webpage, 'JS player URL')
                         player_url = json.loads(jsplayer_url_json)
                     if player_url is None:
                         player_url_json = self._search_regex(
                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
-                            video_webpage, u'age gate player URL')
+                            video_webpage, 'age gate player URL')
                         player_url = json.loads(player_url_json)
 
                     if self._downloader.params.get('verbose'):
@@ -924,14 +895,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                             if player_url.endswith('swf'):
                                 player_version = self._search_regex(
                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
-                                    u'flash player', fatal=False)
+                                    'flash player', fatal=False)
                                 player_desc = 'flash player %s' % player_version
                             else:
                                 player_version = self._search_regex(
                                     r'html5player-([^/]+?)(?:/html5player)?\.js',
                                     player_url,
                                     'html5 player', fatal=False)
-                                player_desc = u'html5 player %s' % player_version
+                                player_desc = 'html5 player %s' % player_version
 
                         parts_sizes = self._signature_cache_id(encrypted_sig)
                         self.to_screen(u'{%s} signature length %s, %s' %
@@ -1023,7 +994,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         }
 
 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
-    IE_DESC = u'YouTube.com playlists'
+    IE_DESC = 'YouTube.com playlists'
     _VALID_URL = r"""(?x)(?:
                         (?:https?://)?
                         (?:\w+\.)?
@@ -1045,27 +1016,72 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
     _MORE_PAGES_INDICATOR = r'data-link-type="next"'
     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
-    IE_NAME = u'youtube:playlist'
+    IE_NAME = 'youtube:playlist'
+    _TESTS = [{
+        'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
+        'info_dict': {
+            'title': 'ytdl test PL',
+        },
+        'playlist_count': 3,
+    }, {
+        'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
+        'info_dict': {
+            'title': 'YDL_Empty_List',
+        },
+        'playlist_count': 0,
+    }, {
+        'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
+        'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
+        'info_dict': {
+            'title': '29C3: Not my department',
+        },
+        'playlist_count': 95,
+    }, {
+        'note': 'issue #673',
+        'url': 'PLBB231211A4F62143',
+        'info_dict': {
+            'title': 'Team Fortress 2 (Class-based LP)',
+        },
+        'playlist_mincount': 26,
+    }, {
+        'note': 'Large playlist',
+        'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
+        'info_dict': {
+            'title': 'Uploads from Cauchemar',
+        },
+        'playlist_mincount': 799,
+    }, {
+        'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
+        'info_dict': {
+            'title': 'YDL_safe_search',
+        },
+        'playlist_count': 2,
+    }]
 
     def _real_initialize(self):
         self._login()
 
     def _ids_to_results(self, ids):
-        return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
-                       for vid_id in ids]
+        return [
+            self.url_result(vid_id, 'Youtube', video_id=vid_id)
+            for vid_id in ids]
 
     def _extract_mix(self, playlist_id):
         # The mixes are generated from a a single video
         # the id of the playlist is just 'RD' + video_id
         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
-        webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
+        webpage = self._download_webpage(
+            url, playlist_id, 'Downloading Youtube mix')
         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
-        title_span = (search_title('playlist-title') or
-            search_title('title long-title') or search_title('title'))
+        title_span = (
+            search_title('playlist-title') or
+            search_title('title long-title') or
+            search_title('title'))
         title = clean_html(title_span)
-        video_re = r'''(?x)data-video-username=".*?".*?
-                       href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
-        ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
+        ids = orderedSet(re.findall(
+            r'''(?xs)data-video-username=".*?".*?
+                       href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
+            webpage))
         url_results = self._ids_to_results(ids)
 
         return self.playlist_result(url_results, playlist_id, title)
@@ -1092,7 +1108,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
             return self._extract_mix(playlist_id)
         if playlist_id.startswith('TL'):
             raise ExtractorError(u'For downloading YouTube.com top lists, use '
-                u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
+                'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
 
         url = self._TEMPLATE_URL % playlist_id
         page = self._download_webpage(url, playlist_id)
@@ -1101,7 +1117,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         # Check if the playlist exists or is private
         if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None:
             raise ExtractorError(
-                u'The playlist doesn\'t exist or is private, use --username or '
+                'The playlist doesn\'t exist or is private, use --username or '
                 '--netrc to access it.',
                 expected=True)
 
@@ -1128,17 +1144,18 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
 
         playlist_title = self._html_search_regex(
             r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
-            page, u'title')
+            page, 'title')
 
         url_results = self._ids_to_results(ids)
         return self.playlist_result(url_results, playlist_id, playlist_title)
 
 
 class YoutubeTopListIE(YoutubePlaylistIE):
-    IE_NAME = u'youtube:toplist'
+    IE_NAME = 'youtube:toplist'
     IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
-        u' (Example: "yttoplist:music:Top Tracks")')
+        ' (Example: "yttoplist:music:Top Tracks")')
     _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
+    _TESTS = []
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -1147,7 +1164,7 @@ class YoutubeTopListIE(YoutubePlaylistIE):
         query = compat_urllib_parse.urlencode({'title': title})
         playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)
         channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
-        link = self._html_search_regex(playlist_re, channel_page, u'list')
+        link = self._html_search_regex(playlist_re, channel_page, 'list')
         url = compat_urlparse.urljoin('https://www.youtube.com/', link)
         
         video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
@@ -1155,9 +1172,10 @@ class YoutubeTopListIE(YoutubePlaylistIE):
         # sometimes the webpage doesn't contain the videos
         # retry until we get them
         for i in itertools.count(0):
-            msg = u'Downloading Youtube mix'
+            msg = 'Downloading Youtube mix'
             if i > 0:
                 msg += ', retry #%d' % i
+
             webpage = self._download_webpage(url, title, msg)
             ids = orderedSet(re.findall(video_re, webpage))
             if ids:
@@ -1167,11 +1185,11 @@ class YoutubeTopListIE(YoutubePlaylistIE):
 
 
 class YoutubeChannelIE(InfoExtractor):
-    IE_DESC = u'YouTube.com channels'
+    IE_DESC = 'YouTube.com channels'
     _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
     _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
     _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
-    IE_NAME = u'youtube:channel'
+    IE_NAME = 'youtube:channel'
 
     def extract_videos_from_page(self, page):
         ids_in_page = []
@@ -1223,12 +1241,12 @@ class YoutubeChannelIE(InfoExtractor):
 
 
 class YoutubeUserIE(InfoExtractor):
-    IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
+    IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
     _GDATA_PAGE_SIZE = 50
     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
-    IE_NAME = u'youtube:user'
+    IE_NAME = 'youtube:user'
 
     @classmethod
     def suitable(cls, url):
@@ -1257,7 +1275,7 @@ class YoutubeUserIE(InfoExtractor):
             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
             page = self._download_webpage(
                 gdata_url, username,
-                u'Downloading video ids from %d to %d' % (
+                'Downloading video ids from %d to %d' % (
                     start_index, start_index + self._GDATA_PAGE_SIZE))
 
             try:
@@ -1285,10 +1303,10 @@ class YoutubeUserIE(InfoExtractor):
 
 
 class YoutubeSearchIE(SearchInfoExtractor):
-    IE_DESC = u'YouTube.com searches'
-    _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
+    IE_DESC = 'YouTube.com searches'
+    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
     _MAX_RESULTS = 1000
-    IE_NAME = u'youtube:search'
+    IE_NAME = 'youtube:search'
     _SEARCH_KEY = 'ytsearch'
 
     def _get_n_results(self, query, n):
@@ -1312,7 +1330,7 @@ class YoutubeSearchIE(SearchInfoExtractor):
 
             if 'items' not in api_response:
                 raise ExtractorError(
-                    u'[youtube] No video results', expected=True)
+                    '[youtube] No video results', expected=True)
 
             new_ids = list(video['id'] for video in api_response['items'])
             video_ids += new_ids
@@ -1331,12 +1349,12 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
     _SEARCH_KEY = 'ytsearchdate'
-    IE_DESC = u'YouTube.com searches, newest videos first'
+    IE_DESC = 'YouTube.com searches, newest videos first'
 
 
 class YoutubeSearchURLIE(InfoExtractor):
-    IE_DESC = u'YouTube.com search URLs'
-    IE_NAME = u'youtube:search_url'
+    IE_DESC = 'YouTube.com search URLs'
+    IE_NAME = 'youtube:search_url'
     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
 
     def _real_extract(self, url):
@@ -1345,7 +1363,7 @@ class YoutubeSearchURLIE(InfoExtractor):
 
         webpage = self._download_webpage(url, query)
         result_code = self._search_regex(
-            r'(?s)<ol class="item-section"(.*?)</ol>', webpage, u'result HTML')
+            r'(?s)<ol class="item-section"(.*?)</ol>', webpage, 'result HTML')
 
         part_codes = re.findall(
             r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
@@ -1371,14 +1389,14 @@ class YoutubeSearchURLIE(InfoExtractor):
 
 
 class YoutubeShowIE(InfoExtractor):
-    IE_DESC = u'YouTube.com (multi-season) shows'
+    IE_DESC = 'YouTube.com (multi-season) shows'
     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
-    IE_NAME = u'youtube:show'
+    IE_NAME = 'youtube:show'
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         show_name = mobj.group(1)
-        webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
+        webpage = self._download_webpage(url, show_name, 'Downloading show webpage')
         # There's one playlist for each season of the show
         m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
         self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
@@ -1404,7 +1422,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
 
     @property
     def IE_NAME(self):
-        return u'youtube:%s' % self._FEED_NAME
+        return 'youtube:%s' % self._FEED_NAME
 
     def _real_initialize(self):
         self._login()
@@ -1414,9 +1432,10 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
         paging = 0
         for i in itertools.count(1):
             info = self._download_json(self._FEED_TEMPLATE % paging,
-                                          u'%s feed' % self._FEED_NAME,
-                                          u'Downloading page %s' % i)
+                                          '%s feed' % self._FEED_NAME,
+                                          'Downloading page %s' % i)
             feed_html = info.get('feed_html') or info.get('content_html')
+            load_more_widget_html = info.get('load_more_widget_html') or feed_html
             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
             ids = orderedSet(m.group(1) for m in m_ids)
             feed_entries.extend(
@@ -1424,50 +1443,82 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
                 for video_id in ids)
             mobj = re.search(
                 r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
-                feed_html)
+                load_more_widget_html)
             if mobj is None:
                 break
             paging = mobj.group('paging')
         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
 
-class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
-    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
-    _FEED_NAME = 'subscriptions'
-    _PLAYLIST_TITLE = u'Youtube Subscriptions'
-
 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
+    IE_DESC = 'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
     _FEED_NAME = 'recommended'
-    _PLAYLIST_TITLE = u'Youtube Recommended videos'
+    _PLAYLIST_TITLE = 'Youtube Recommended videos'
 
 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
+    IE_DESC = 'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
     _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
     _FEED_NAME = 'watch_later'
-    _PLAYLIST_TITLE = u'Youtube Watch Later'
+    _PLAYLIST_TITLE = 'Youtube Watch Later'
     _PERSONAL_FEED = True
 
 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
-    _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
+    IE_DESC = 'Youtube watch history, "ythistory" keyword (requires authentication)'
+    _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
     _FEED_NAME = 'history'
     _PERSONAL_FEED = True
-    _PLAYLIST_TITLE = u'Youtube Watch History'
+    _PLAYLIST_TITLE = 'Youtube Watch History'
 
 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
-    IE_NAME = u'youtube:favorites'
-    IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
+    IE_NAME = 'youtube:favorites'
+    IE_DESC = 'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
     _LOGIN_REQUIRED = True
 
     def _real_extract(self, url):
         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
-        playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
+        playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
         return self.url_result(playlist_id, 'YoutubePlaylist')
 
 
+class YoutubeSubscriptionsIE(YoutubePlaylistIE):
+    IE_NAME = 'youtube:subscriptions'
+    IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
+    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
+    _TESTS = []
+
+    def _real_extract(self, url):
+        title = 'Youtube Subscriptions'
+        page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title)
+
+        # The extraction process is the same as for playlists, but the regex
+        # for the video ids doesn't contain an index
+        ids = []
+        more_widget_html = content_html = page
+
+        for page_num in itertools.count(1):
+            matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
+            new_ids = orderedSet(matches)
+            ids.extend(new_ids)
+
+            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
+            if not mobj:
+                break
+
+            more = self._download_json(
+                'https://youtube.com/%s' % mobj.group('more'), title,
+                'Downloading page #%s' % page_num,
+                transform_source=uppercase_escape)
+            content_html = more['content_html']
+            more_widget_html = more['load_more_widget_html']
+
+        return {
+            '_type': 'playlist',
+            'title': title,
+            'entries': self._ids_to_results(ids),
+        }
+
+
 class YoutubeTruncatedURLIE(InfoExtractor):
     IE_NAME = 'youtube:truncated_url'
     IE_DESC = False  # Do not list
@@ -1489,9 +1540,9 @@ class YoutubeTruncatedURLIE(InfoExtractor):
 
     def _real_extract(self, url):
         raise ExtractorError(
-            u'Did you forget to quote the URL? Remember that & is a meta '
-            u'character in most shells, so you want to put the URL in quotes, '
-            u'like  youtube-dl '
-            u'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
-            u' or simply  youtube-dl BaW_jenozKc  .',
+            'Did you forget to quote the URL? Remember that & is a meta '
+            'character in most shells, so you want to put the URL in quotes, '
+            'like  youtube-dl '
+            '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
+            ' or simply  youtube-dl BaW_jenozKc  .',
             expected=True)
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
new file mode 100644 (file)
index 0000000..31baab4
--- /dev/null
@@ -0,0 +1,481 @@
+from __future__ import unicode_literals
+
+import os.path
+import optparse
+import shlex
+import sys
+
+from .utils import (
+    get_term_width,
+    write_string,
+)
+from .version import __version__
+
+
+def parseOpts(overrideArguments=None):
+    def _readOptions(filename_bytes, default=[]):
+        try:
+            optionf = open(filename_bytes)
+        except IOError:
+            return default  # silently skip if file is not present
+        try:
+            res = []
+            for l in optionf:
+                res += shlex.split(l, comments=True)
+        finally:
+            optionf.close()
+        return res
+
+    def _readUserConf():
+        xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
+        if xdg_config_home:
+            userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
+            if not os.path.isfile(userConfFile):
+                userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
+        else:
+            userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
+            if not os.path.isfile(userConfFile):
+                userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
+        userConf = _readOptions(userConfFile, None)
+
+        if userConf is None:
+            appdata_dir = os.environ.get('appdata')
+            if appdata_dir:
+                userConf = _readOptions(
+                    os.path.join(appdata_dir, 'youtube-dl', 'config'),
+                    default=None)
+                if userConf is None:
+                    userConf = _readOptions(
+                        os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
+                        default=None)
+
+        if userConf is None:
+            userConf = _readOptions(
+                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
+                default=None)
+        if userConf is None:
+            userConf = _readOptions(
+                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
+                default=None)
+
+        if userConf is None:
+            userConf = []
+
+        return userConf
+
+    def _format_option_string(option):
+        ''' ('-o', '--option') -> -o, --format METAVAR'''
+
+        opts = []
+
+        if option._short_opts:
+            opts.append(option._short_opts[0])
+        if option._long_opts:
+            opts.append(option._long_opts[0])
+        if len(opts) > 1:
+            opts.insert(1, ', ')
+
+        if option.takes_value(): opts.append(' %s' % option.metavar)
+
+        return "".join(opts)
+
+    def _comma_separated_values_options_callback(option, opt_str, value, parser):
+        setattr(parser.values, option.dest, value.split(','))
+
+    def _hide_login_info(opts):
+        opts = list(opts)
+        for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:
+            try:
+                i = opts.index(private_opt)
+                opts[i+1] = '<PRIVATE>'
+            except ValueError:
+                pass
+        return opts
+
+    max_width = 80
+    max_help_position = 80
+
+    # No need to wrap help messages if we're on a wide console
+    columns = get_term_width()
+    if columns: max_width = columns
+
+    fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
+    fmt.format_option_strings = _format_option_string
+
+    kw = {
+        'version'   : __version__,
+        'formatter' : fmt,
+        'usage' : '%prog [options] url [url...]',
+        'conflict_handler' : 'resolve',
+    }
+
+    parser = optparse.OptionParser(**kw)
+
+    # option groups
+    general        = optparse.OptionGroup(parser, 'General Options')
+    selection      = optparse.OptionGroup(parser, 'Video Selection')
+    authentication = optparse.OptionGroup(parser, 'Authentication Options')
+    video_format   = optparse.OptionGroup(parser, 'Video Format Options')
+    subtitles      = optparse.OptionGroup(parser, 'Subtitle Options')
+    downloader     = optparse.OptionGroup(parser, 'Download Options')
+    postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
+    filesystem     = optparse.OptionGroup(parser, 'Filesystem Options')
+    workarounds    = optparse.OptionGroup(parser, 'Workarounds')
+    verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
+
+    general.add_option('-h', '--help',
+            action='help', help='print this help text and exit')
+    general.add_option('-v', '--version',
+            action='version', help='print program version and exit')
+    general.add_option('-U', '--update',
+            action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
+    general.add_option('-i', '--ignore-errors',
+            action='store_true', dest='ignoreerrors', help='continue on download errors, for example to skip unavailable videos in a playlist', default=False)
+    general.add_option('--abort-on-error',
+            action='store_false', dest='ignoreerrors',
+            help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
+    general.add_option('--dump-user-agent',
+            action='store_true', dest='dump_user_agent',
+            help='display the current browser identification', default=False)
+    general.add_option('--list-extractors',
+            action='store_true', dest='list_extractors',
+            help='List all supported extractors and the URLs they would handle', default=False)
+    general.add_option('--extractor-descriptions',
+            action='store_true', dest='list_extractor_descriptions',
+            help='Output descriptions of all supported extractors', default=False)
+    general.add_option(
+        '--proxy', dest='proxy', default=None, metavar='URL',
+        help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
+    general.add_option(
+        '--socket-timeout', dest='socket_timeout',
+        type=float, default=None, help=u'Time to wait before giving up, in seconds')
+    general.add_option(
+        '--default-search',
+        dest='default_search', metavar='PREFIX',
+        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
+    general.add_option(
+        '--ignore-config',
+        action='store_true',
+        help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
+
+    selection.add_option(
+        '--playlist-start',
+        dest='playliststart', metavar='NUMBER', default=1, type=int,
+        help='playlist video to start at (default is %default)')
+    selection.add_option(
+        '--playlist-end',
+        dest='playlistend', metavar='NUMBER', default=None, type=int,
+        help='playlist video to end at (default is last)')
+    selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
+    selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
+    selection.add_option('--max-downloads', metavar='NUMBER',
+                         dest='max_downloads', type=int, default=None,
+                         help='Abort after downloading NUMBER files')
+    selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
+    selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
+    selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
+    selection.add_option(
+        '--datebefore', metavar='DATE', dest='datebefore', default=None,
+        help='download only videos uploaded on or before this date (i.e. inclusive)')
+    selection.add_option(
+        '--dateafter', metavar='DATE', dest='dateafter', default=None,
+        help='download only videos uploaded on or after this date (i.e. inclusive)')
+    selection.add_option(
+        '--min-views', metavar='COUNT', dest='min_views',
+        default=None, type=int,
+        help="Do not download any videos with less than COUNT views",)
+    selection.add_option(
+        '--max-views', metavar='COUNT', dest='max_views',
+        default=None, type=int,
+        help="Do not download any videos with more than COUNT views",)
+    selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
+    selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
+                         help='download only videos suitable for the given age',
+                         default=None, type=int)
+    selection.add_option('--download-archive', metavar='FILE',
+                         dest='download_archive',
+                         help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
+    selection.add_option(
+        '--include-ads', dest='include_ads',
+        action='store_true',
+        help='Download advertisements as well (experimental)')
+    selection.add_option(
+        '--youtube-include-dash-manifest', action='store_true',
+        dest='youtube_include_dash_manifest', default=False,
+        help='Try to download the DASH manifest on YouTube videos (experimental)')
+
+    authentication.add_option('-u', '--username',
+            dest='username', metavar='USERNAME', help='account username')
+    authentication.add_option('-p', '--password',
+            dest='password', metavar='PASSWORD', help='account password')
+    authentication.add_option('-2', '--twofactor',
+            dest='twofactor', metavar='TWOFACTOR', help='two-factor auth code')
+    authentication.add_option('-n', '--netrc',
+            action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
+    authentication.add_option('--video-password',
+            dest='videopassword', metavar='PASSWORD', help='video password (vimeo, smotri)')
+
+
+    video_format.add_option('-f', '--format',
+            action='store', dest='format', metavar='FORMAT', default=None,
+            help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.')
+    video_format.add_option('--all-formats',
+            action='store_const', dest='format', help='download all available video formats', const='all')
+    video_format.add_option('--prefer-free-formats',
+            action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
+    video_format.add_option('--max-quality',
+            action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
+    video_format.add_option('-F', '--list-formats',
+            action='store_true', dest='listformats', help='list all available formats')
+
+    subtitles.add_option('--write-sub', '--write-srt',
+            action='store_true', dest='writesubtitles',
+            help='write subtitle file', default=False)
+    subtitles.add_option('--write-auto-sub', '--write-automatic-sub',
+            action='store_true', dest='writeautomaticsub',
+            help='write automatic subtitle file (youtube only)', default=False)
+    subtitles.add_option('--all-subs',
+            action='store_true', dest='allsubtitles',
+            help='downloads all the available subtitles of the video', default=False)
+    subtitles.add_option('--list-subs',
+            action='store_true', dest='listsubtitles',
+            help='lists all available subtitles for the video', default=False)
+    subtitles.add_option('--sub-format',
+            action='store', dest='subtitlesformat', metavar='FORMAT',
+            help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
+    subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang',
+            action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
+            default=[], callback=_comma_separated_values_options_callback,
+            help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
+
+    downloader.add_option('-r', '--rate-limit',
+            dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)')
+    downloader.add_option('-R', '--retries',
+            dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
+    downloader.add_option('--buffer-size',
+            dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16K) (default is %default)', default="1024")
+    downloader.add_option('--no-resize-buffer',
+            action='store_true', dest='noresizebuffer',
+            help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
+    downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
+
+    workarounds.add_option(
+        '--encoding', dest='encoding', metavar='ENCODING',
+        help='Force the specified encoding (experimental)')
+    workarounds.add_option(
+        '--no-check-certificate', action='store_true',
+        dest='no_check_certificate', default=False,
+        help='Suppress HTTPS certificate validation.')
+    workarounds.add_option(
+        '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
+        help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
+    workarounds.add_option(
+        '--user-agent', metavar='UA',
+        dest='user_agent', help='specify a custom user agent')
+    workarounds.add_option(
+        '--referer', metavar='REF',
+        dest='referer', default=None,
+        help='specify a custom referer, use if the video access is restricted to one domain',
+    )
+    workarounds.add_option(
+        '--add-header', metavar='FIELD:VALUE',
+        dest='headers', action='append',
+        help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
+    )
+    workarounds.add_option(
+        '--bidi-workaround', dest='bidi_workaround', action='store_true',
+        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
+
+    verbosity.add_option('-q', '--quiet',
+            action='store_true', dest='quiet', help='activates quiet mode', default=False)
+    verbosity.add_option(
+        '--no-warnings',
+        dest='no_warnings', action='store_true', default=False,
+        help='Ignore warnings')
+    verbosity.add_option('-s', '--simulate',
+            action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
+    verbosity.add_option('--skip-download',
+            action='store_true', dest='skip_download', help='do not download the video', default=False)
+    verbosity.add_option('-g', '--get-url',
+            action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
+    verbosity.add_option('-e', '--get-title',
+            action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
+    verbosity.add_option('--get-id',
+            action='store_true', dest='getid', help='simulate, quiet but print id', default=False)
+    verbosity.add_option('--get-thumbnail',
+            action='store_true', dest='getthumbnail',
+            help='simulate, quiet but print thumbnail URL', default=False)
+    verbosity.add_option('--get-description',
+            action='store_true', dest='getdescription',
+            help='simulate, quiet but print video description', default=False)
+    verbosity.add_option('--get-duration',
+            action='store_true', dest='getduration',
+            help='simulate, quiet but print video length', default=False)
+    verbosity.add_option('--get-filename',
+            action='store_true', dest='getfilename',
+            help='simulate, quiet but print output filename', default=False)
+    verbosity.add_option('--get-format',
+            action='store_true', dest='getformat',
+            help='simulate, quiet but print output format', default=False)
+    verbosity.add_option('-j', '--dump-json',
+            action='store_true', dest='dumpjson',
+            help='simulate, quiet but print JSON information. See --output for a description of available keys.', default=False)
+    verbosity.add_option('--newline',
+            action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
+    verbosity.add_option('--no-progress',
+            action='store_true', dest='noprogress', help='do not print progress bar', default=False)
+    verbosity.add_option('--console-title',
+            action='store_true', dest='consoletitle',
+            help='display progress in console titlebar', default=False)
+    verbosity.add_option('-v', '--verbose',
+            action='store_true', dest='verbose', help='print various debugging information', default=False)
+    verbosity.add_option('--dump-intermediate-pages',
+            action='store_true', dest='dump_intermediate_pages', default=False,
+            help='print downloaded pages to debug problems (very verbose)')
+    verbosity.add_option('--write-pages',
+            action='store_true', dest='write_pages', default=False,
+            help='Write downloaded intermediary pages to files in the current directory to debug problems')
+    verbosity.add_option('--youtube-print-sig-code',
+            action='store_true', dest='youtube_print_sig_code', default=False,
+            help=optparse.SUPPRESS_HELP)
+    verbosity.add_option('--print-traffic',
+            dest='debug_printtraffic', action='store_true', default=False,
+            help='Display sent and read HTTP traffic')
+
+
+    filesystem.add_option('-a', '--batch-file',
+            dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
+    filesystem.add_option('--id',
+            action='store_true', dest='useid', help='use only video ID in file name', default=False)
+    filesystem.add_option('-A', '--auto-number',
+            action='store_true', dest='autonumber',
+            help='number downloaded files starting from 00000', default=False)
+    filesystem.add_option('-o', '--output',
+            dest='outtmpl', metavar='TEMPLATE',
+            help=('output filename template. Use %(title)s to get the title, '
+                  '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
+                  '%(autonumber)s to get an automatically incremented number, '
+                  '%(ext)s for the filename extension, '
+                  '%(format)s for the format description (like "22 - 1280x720" or "HD"), '
+                  '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), '
+                  '%(upload_date)s for the upload date (YYYYMMDD), '
+                  '%(extractor)s for the provider (youtube, metacafe, etc), '
+                  '%(id)s for the video id, %(playlist)s for the playlist the video is in, '
+                  '%(playlist_index)s for the position in the playlist and %% for a literal percent. '
+                  '%(height)s and %(width)s for the width and height of the video format. '
+                  '%(resolution)s for a textual description of the resolution of the video format. '
+                  'Use - to output to stdout. Can also be used to download to a different directory, '
+                  'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
+    filesystem.add_option('--autonumber-size',
+            dest='autonumber_size', metavar='NUMBER',
+            help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
+    filesystem.add_option('--restrict-filenames',
+            action='store_true', dest='restrictfilenames',
+            help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
+    filesystem.add_option('-t', '--title',
+            action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False)
+    filesystem.add_option('-l', '--literal',
+            action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
+    filesystem.add_option('-w', '--no-overwrites',
+            action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
+    filesystem.add_option('-c', '--continue',
+            action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True)
+    filesystem.add_option('--no-continue',
+            action='store_false', dest='continue_dl',
+            help='do not resume partially downloaded files (restart from beginning)')
+    filesystem.add_option('--no-part',
+            action='store_true', dest='nopart', help='do not use .part files', default=False)
+    filesystem.add_option('--no-mtime',
+            action='store_false', dest='updatetime',
+            help='do not use the Last-modified header to set the file modification time', default=True)
+    filesystem.add_option('--write-description',
+            action='store_true', dest='writedescription',
+            help='write video description to a .description file', default=False)
+    filesystem.add_option('--write-info-json',
+            action='store_true', dest='writeinfojson',
+            help='write video metadata to a .info.json file', default=False)
+    filesystem.add_option('--write-annotations',
+            action='store_true', dest='writeannotations',
+            help='write video annotations to a .annotation file', default=False)
+    filesystem.add_option('--write-thumbnail',
+            action='store_true', dest='writethumbnail',
+            help='write thumbnail image to disk', default=False)
+    filesystem.add_option('--load-info',
+            dest='load_info_filename', metavar='FILE',
+            help='json file containing the video information (created with the "--write-json" option)')
+    filesystem.add_option('--cookies',
+            dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
+    filesystem.add_option(
+        '--cache-dir', dest='cachedir', default=None, metavar='DIR',
+        help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
+    filesystem.add_option(
+        '--no-cache-dir', action='store_const', const=False, dest='cachedir',
+        help='Disable filesystem caching')
+    filesystem.add_option(
+        '--rm-cache-dir', action='store_true', dest='rm_cachedir',
+        help='Delete all filesystem cache files')
+
+
+    postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
+            help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
+    postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
+            help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; best by default')
+    postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
+            help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
+    postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None,
+            help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)')
+    postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
+            help='keeps the video file on disk after the post-processing; the video is erased by default')
+    postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
+            help='do not overwrite post-processed files; the post-processed files are overwritten by default')
+    postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
+            help='embed subtitles in the video (only for mp4 videos)')
+    postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False,
+            help='embed thumbnail in the audio as cover art')
+    postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
+            help='write metadata to the video file')
+    postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False,
+            help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
+    postproc.add_option('--prefer-avconv', action='store_false', dest='prefer_ffmpeg',
+        help='Prefer avconv over ffmpeg for running the postprocessors (default)')
+    postproc.add_option('--prefer-ffmpeg', action='store_true', dest='prefer_ffmpeg',
+        help='Prefer ffmpeg over avconv for running the postprocessors')
+    postproc.add_option(
+        '--exec', metavar='CMD', dest='exec_cmd',
+        help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'' )
+
+    parser.add_option_group(general)
+    parser.add_option_group(selection)
+    parser.add_option_group(downloader)
+    parser.add_option_group(filesystem)
+    parser.add_option_group(verbosity)
+    parser.add_option_group(workarounds)
+    parser.add_option_group(video_format)
+    parser.add_option_group(subtitles)
+    parser.add_option_group(authentication)
+    parser.add_option_group(postproc)
+
+    if overrideArguments is not None:
+        opts, args = parser.parse_args(overrideArguments)
+        if opts.verbose:
+            write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
+    else:
+        commandLineConf = sys.argv[1:]
+        if '--ignore-config' in commandLineConf:
+            systemConf = []
+            userConf = []
+        else:
+            systemConf = _readOptions('/etc/youtube-dl.conf')
+            if '--ignore-config' in systemConf:
+                userConf = []
+            else:
+                userConf = _readUserConf()
+        argv = systemConf + userConf + commandLineConf
+
+        opts, args = parser.parse_args(argv)
+        if opts.verbose:
+            write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
+            write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
+            write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
+
+    return parser, opts, args
index 16bc7408a74a535fa55a013866a096bec387d564..b644f4e920bf0353658ec9920abdb0541dbaf0e2 100644 (file)
@@ -280,6 +280,11 @@ if sys.version_info >= (2, 7):
         return node.find(expr)
 else:
     def find_xpath_attr(node, xpath, key, val):
+        # Here comes the crazy part: In 2.6, if the xpath is a unicode,
+        # .//node does not match if a node is a direct child of . !
+        if isinstance(xpath, unicode):
+            xpath = xpath.encode('ascii')
+
         for f in node.findall(xpath):
             if f.attrib.get(key) == val:
                 return f
@@ -298,30 +303,20 @@ def xpath_with_ns(path, ns_map):
             replaced.append('{%s}%s' % (ns_map[ns], tag))
     return '/'.join(replaced)
 
-def htmlentity_transform(matchobj):
-    """Transforms an HTML entity to a character.
 
-    This function receives a match object and is intended to be used with
-    the re.sub() function.
-    """
-    entity = matchobj.group(1)
+def xpath_text(node, xpath, name=None, fatal=False):
+    if sys.version_info < (2, 7):  # Crazy 2.6
+        xpath = xpath.encode('ascii')
 
-    # Known non-numeric HTML entity
-    if entity in compat_html_entities.name2codepoint:
-        return compat_chr(compat_html_entities.name2codepoint[entity])
-
-    mobj = re.match(u'(?u)#(x?\\d+)', entity)
-    if mobj is not None:
-        numstr = mobj.group(1)
-        if numstr.startswith(u'x'):
-            base = 16
-            numstr = u'0%s' % numstr
+    n = node.find(xpath)
+    if n is None:
+        if fatal:
+            name = xpath if name is None else name
+            raise ExtractorError('Could not find XML element %s' % name)
         else:
-            base = 10
-        return compat_chr(int(numstr, base))
+            return None
+    return n.text
 
-    # Unknown entity in name, return its literal representation
-    return (u'&%s;' % entity)
 
 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
 class BaseHTMLParser(compat_html_parser.HTMLParser):
@@ -543,13 +538,33 @@ def orderedSet(iterable):
     return res
 
 
+def _htmlentity_transform(entity):
+    """Transforms an HTML entity to a character."""
+    # Known non-numeric HTML entity
+    if entity in compat_html_entities.name2codepoint:
+        return compat_chr(compat_html_entities.name2codepoint[entity])
+
+    mobj = re.match(r'#(x?[0-9]+)', entity)
+    if mobj is not None:
+        numstr = mobj.group(1)
+        if numstr.startswith(u'x'):
+            base = 16
+            numstr = u'0%s' % numstr
+        else:
+            base = 10
+        return compat_chr(int(numstr, base))
+
+    # Unknown entity in name, return its literal representation
+    return (u'&%s;' % entity)
+
+
 def unescapeHTML(s):
     if s is None:
         return None
     assert type(s) == compat_str
 
-    result = re.sub(r'(?u)&(.+?);', htmlentity_transform, s)
-    return result
+    return re.sub(
+        r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
 
 
 def encodeFilename(s, for_subprocess=False):
@@ -621,7 +636,7 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
                     self.sock = sock
                     self._tunnel()
                 try:
-                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
+                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
                 except ssl.SSLError:
                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
 
@@ -629,8 +644,14 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
             def https_open(self, req):
                 return self.do_open(HTTPSConnectionV3, req)
         return HTTPSHandlerV3(**kwargs)
-    else:
-        context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
+    elif hasattr(ssl, 'create_default_context'):  # Python >= 3.4
+        context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
+        context.options &= ~ssl.OP_NO_SSLv3  # Allow older, not-as-secure SSLv3
+        if opts_no_check_certificate:
+            context.verify_mode = ssl.CERT_NONE
+        return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
+    else:  # Python < 3.4
+        context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
         context.verify_mode = (ssl.CERT_NONE
                                if opts_no_check_certificate
                                else ssl.CERT_REQUIRED)
@@ -766,10 +787,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
         return ret
 
     def http_request(self, req):
-        for h,v in std_headers.items():
-            if h in req.headers:
-                del req.headers[h]
-            req.add_header(h, v)
+        for h, v in std_headers.items():
+            if h not in req.headers:
+                req.add_header(h, v)
         if 'Youtubedl-no-compression' in req.headers:
             if 'Accept-encoding' in req.headers:
                 del req.headers['Accept-encoding']
@@ -1081,12 +1101,6 @@ def intlist_to_bytes(xs):
         return bytes(xs)
 
 
-def get_cachedir(params={}):
-    cache_root = os.environ.get('XDG_CACHE_HOME',
-                                os.path.expanduser('~/.cache'))
-    return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
-
-
 # Cross-platform file locking
 if sys.platform == 'win32':
     import ctypes.wintypes
@@ -1146,10 +1160,10 @@ else:
     import fcntl
 
     def _lock_file(f, exclusive):
-        fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+        fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
 
     def _unlock_file(f):
-        fcntl.lockf(f, fcntl.LOCK_UN)
+        fcntl.flock(f, fcntl.LOCK_UN)
 
 
 class locked_file(object):
@@ -1323,9 +1337,10 @@ def str_or_none(v, default=None):
 
 
 def str_to_int(int_str):
+    """ A more relaxed version of int_or_none """
     if int_str is None:
         return None
-    int_str = re.sub(r'[,\.]', u'', int_str)
+    int_str = re.sub(r'[,\.\+]', u'', int_str)
     return int(int_str)
 
 
@@ -1337,8 +1352,10 @@ def parse_duration(s):
     if s is None:
         return None
 
+    s = s.strip()
+
     m = re.match(
-        r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?(?P<ms>\.[0-9]+)?$', s)
+        r'(?i)(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
     if not m:
         return None
     res = int(m.group('secs'))
@@ -1420,6 +1437,24 @@ def uppercase_escape(s):
         lambda m: unicode_escape(m.group(0))[0],
         s)
 
+
+def escape_rfc3986(s):
+    """Escape non-ASCII characters as suggested by RFC 3986"""
+    if sys.version_info < (3, 0) and isinstance(s, unicode):
+        s = s.encode('utf-8')
+    return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]")
+
+
+def escape_url(url):
+    """Escape URL as suggested by RFC 3986"""
+    url_parsed = compat_urllib_parse_urlparse(url)
+    return url_parsed._replace(
+        path=escape_rfc3986(url_parsed.path),
+        params=escape_rfc3986(url_parsed.params),
+        query=escape_rfc3986(url_parsed.query),
+        fragment=escape_rfc3986(url_parsed.fragment)
+    ).geturl()
+
 try:
     struct.pack(u'!I', 0)
 except TypeError:
@@ -1554,3 +1589,13 @@ except AttributeError:
         if ret:
             raise subprocess.CalledProcessError(ret, p.args, output=output)
         return output
+
+
+def limit_length(s, length):
+    """ Add ellipses to overly long strings """
+    if s is None:
+        return None
+    ELLIPSES = '...'
+    if len(s) > length:
+        return s[:length - len(ELLIPSES)] + ELLIPSES
+    return s
index 7939e48e9a833d81e73cc0e68f97f929acff9564..cf0d862da60105fe1f76efc2fecf7106e2fdeb03 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.08.25.3'
+__version__ = '2014.09.15.1'