Merge remote-tracking branch 'xavierbeynon/master'
authorPhilipp Hagemeister <phihag@phihag.de>
Sat, 10 Jan 2015 01:03:46 +0000 (02:03 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Sat, 10 Jan 2015 01:03:46 +0000 (02:03 +0100)
75 files changed:
.gitignore
.travis.yml
AUTHORS
CONTRIBUTING.md
Makefile
README.md
devscripts/gh-pages/update-sites.py
devscripts/make_supportedsites.py
test/helper.py
test/test_InfoExtractor.py
test/test_YoutubeDL.py
test/test_age_restriction.py
test/test_subtitles.py
test/test_utils.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/downloader/f4m.py
youtube_dl/downloader/hls.py
youtube_dl/downloader/mplayer.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/auengine.py
youtube_dl/extractor/bbccouk.py
youtube_dl/extractor/bet.py
youtube_dl/extractor/bilibili.py
youtube_dl/extractor/buzzfeed.py
youtube_dl/extractor/canalplus.py
youtube_dl/extractor/ceskatelevize.py
youtube_dl/extractor/common.py
youtube_dl/extractor/commonmistakes.py [new file with mode: 0644]
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/discovery.py
youtube_dl/extractor/ellentv.py
youtube_dl/extractor/elpais.py
youtube_dl/extractor/fktv.py
youtube_dl/extractor/gameone.py
youtube_dl/extractor/gdcvault.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/giga.py [new file with mode: 0644]
youtube_dl/extractor/huffpost.py
youtube_dl/extractor/imdb.py
youtube_dl/extractor/khanacademy.py
youtube_dl/extractor/kontrtube.py
youtube_dl/extractor/lrt.py
youtube_dl/extractor/mit.py
youtube_dl/extractor/motorsport.py
youtube_dl/extractor/netzkino.py [new file with mode: 0644]
youtube_dl/extractor/normalboots.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/played.py
youtube_dl/extractor/radiobremen.py [new file with mode: 0644]
youtube_dl/extractor/rtlnl.py
youtube_dl/extractor/rutube.py
youtube_dl/extractor/sexykarma.py
youtube_dl/extractor/soulanime.py [new file with mode: 0644]
youtube_dl/extractor/teachertube.py
youtube_dl/extractor/ted.py
youtube_dl/extractor/tf1.py
youtube_dl/extractor/tudou.py
youtube_dl/extractor/tunein.py
youtube_dl/extractor/vier.py [new file with mode: 0644]
youtube_dl/extractor/viki.py
youtube_dl/extractor/vimple.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/washingtonpost.py
youtube_dl/extractor/wdr.py
youtube_dl/extractor/webofstories.py [new file with mode: 0644]
youtube_dl/extractor/xhamster.py
youtube_dl/extractor/xtube.py
youtube_dl/extractor/youtube.py
youtube_dl/extractor/zdf.py
youtube_dl/options.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/update.py
youtube_dl/utils.py
youtube_dl/version.py

index 86312d4e4185cd6dbd56627a3f2ce9f8ef5e9a43..0422adf4456ec35166f5d2bce6b832c67601c2dc 100644 (file)
@@ -31,3 +31,5 @@ updates_key.pem
 test/testdata
 .tox
 youtube-dl.zsh
+.idea
+.idea/*
\ No newline at end of file
index c6cc7a994be31732c563612d3560ba1cdf6379da..f140144149920ed025f36101a413165516be17ec 100644 (file)
@@ -9,7 +9,6 @@ notifications:
   email:
     - filippo.valsorda@gmail.com
     - phihag@phihag.de
-    - jaime.marquinez.ferrandiz+travis@gmail.com
     - yasoob.khld@gmail.com
 #  irc:
 #    channels:
diff --git a/AUTHORS b/AUTHORS
index 29ce9e3e4792d45674da49e1a585ed231ea2a3a8..8f201080305d80473b85572e1429d480a9530874 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -97,3 +97,7 @@ Petr Kutalek
 Will Glynn
 Max Reimann
 Cédric Luthi
+Thijs Vermeir
+Joel Leclerc
+Christopher Krooss
+Ondřej Caletka
index 0ff7b395a9f71e2aa2a9e1f20417d34e1f0fbcf6..7917abfc6df2480eb779f22ab336dd82c10f3d13 100644 (file)
@@ -44,7 +44,7 @@ In particular, every site support request issue should only pertain to services
 
 ###  Is anyone going to need the feature?
 
-Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
+Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
 
 ###  Is your question about youtube-dl?
 
index 71470eedbaccbdeb0a6a5ab198ae3a07b2e48643..5780798793cf2915807d28791a8cb59d40e9dcf2 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -46,7 +46,7 @@ test:
 ot: offlinetest
 
 offlinetest: codetest
-       nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations
+       nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists
 
 tar: youtube-dl.tar.gz
 
@@ -63,7 +63,7 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
        chmod a+x youtube-dl
 
 README.md: youtube_dl/*.py youtube_dl/*/*.py
-       COLUMNS=80 python -m youtube_dl --help | python devscripts/make_readme.py
+       COLUMNS=80 python youtube_dl/__main__.py --help | python devscripts/make_readme.py
 
 CONTRIBUTING.md: README.md
        python devscripts/make_contributing.py README.md CONTRIBUTING.md
index 915bcd0cd2469c70d20ebd469d9814298c5cc2c8..3d632a5532f218484382814c15fb86795f69daf3 100644 (file)
--- a/README.md
+++ b/README.md
@@ -219,6 +219,8 @@ which means you can modify it, redistribute it or use it however you like.
                                      for each command-line argument. If the URL
                                      refers to a playlist, dump the whole
                                      playlist information in a single line.
+    --print-json                     Be quiet and print the video information as
+                                     JSON (video is still being downloaded).
     --newline                        output progress bar as new lines
     --no-progress                    do not print progress bar
     --console-title                  display progress in console titlebar
@@ -248,14 +250,15 @@ which means you can modify it, redistribute it or use it however you like.
 
 ## Video Format Options:
     -f, --format FORMAT              video format code, specify the order of
-                                     preference using slashes: -f 22/17/18 .  -f
-                                     mp4 , -f m4a and  -f flv  are also
-                                     supported. You can also use the special
-                                     names "best", "bestvideo", "bestaudio",
-                                     "worst", "worstvideo" and "worstaudio". By
-                                     default, youtube-dl will pick the best
-                                     quality. Use commas to download multiple
-                                     audio formats, such as -f
+                                     preference using slashes, as in -f 22/17/18
+                                     .  Instead of format codes, you can select
+                                     by extension for the extensions aac, m4a,
+                                     mp3, mp4, ogg, wav, webm. You can also use
+                                     the special names "best", "bestvideo",
+                                     "bestaudio", "worst".  By default, youtube-
+                                     dl will pick the best quality. Use commas
+                                     to download multiple audio formats, such as
+                                     -f
                                      136/137/mp4/bestvideo,140/m4a/bestaudio.
                                      You can merge the video and audio of two
                                      formats into a single file using -f <video-
@@ -326,7 +329,7 @@ which means you can modify it, redistribute it or use it however you like.
 
 # CONFIGURATION
 
-You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
+You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.
 
 # OUTPUT TEMPLATE
 
@@ -449,6 +452,14 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
 
 To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
 
+### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
+
+If you put youtube-dl and ffmpeg in the same directory that you're running the command from, it will work, but that's rather cumbersome.
+
+To make a different directory work - either for ffmpeg, or for youtube-dl, or for both - simply create the directory (say, `C:\bin`, or `C:\Users\<User name>\bin`), put all the executables directly in there, and then [set your PATH environment variable](https://www.java.com/en/download/help/path.xml) to include that directory.
+
+From then on, after restarting your shell, you will be able to access both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg) by simply typing `youtube-dl` or `ffmpeg`, no matter what directory you're in.
+
 ### How can I detect whether a given URL is supported by youtube-dl?
 
 For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
index f0f0481c781ab40f8386de5c87a99c6468e00708..d3ef5f0b50daa56513118f55d5b636e5f46552a0 100755 (executable)
@@ -16,7 +16,7 @@ def main():
         template = tmplf.read()
 
     ie_htmls = []
-    for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
+    for ie in youtube_dl.list_extractors(age_limit=None):
         ie_html = '<b>{}</b>'.format(ie.IE_NAME)
         ie_desc = getattr(ie, 'IE_DESC', None)
         if ie_desc is False:
index 14001064400f37b14feeb9a0fee6a898ef36ef4a..3df4385a6b09a520791f67f8f3958c0b635df1bf 100644 (file)
@@ -23,12 +23,12 @@ def main():
 
     def gen_ies_md(ies):
         for ie in ies:
-            ie_md = '**{}**'.format(ie.IE_NAME)
+            ie_md = '**{0}**'.format(ie.IE_NAME)
             ie_desc = getattr(ie, 'IE_DESC', None)
             if ie_desc is False:
                 continue
             if ie_desc is not None:
-                ie_md += ': {}'.format(ie.IE_DESC)
+                ie_md += ': {0}'.format(ie.IE_DESC)
             if not ie.working():
                 ie_md += ' (Currently broken)'
             yield ie_md
index 96d58b7c12fd9119b3b5f65eb9c41cfc3c97f500..c416f388cbfe335678269b0226cc10708c49a850 100644 (file)
@@ -82,18 +82,8 @@ class FakeYDL(YoutubeDL):
 
 def gettestcases(include_onlymatching=False):
     for ie in youtube_dl.extractor.gen_extractors():
-        t = getattr(ie, '_TEST', None)
-        if t:
-            assert not hasattr(ie, '_TESTS'), \
-                '%s has _TEST and _TESTS' % type(ie).__name__
-            tests = [t]
-        else:
-            tests = getattr(ie, '_TESTS', [])
-        for t in tests:
-            if not include_onlymatching and t.get('only_matching', False):
-                continue
-            t['name'] = type(ie).__name__[:-len('IE')]
-            yield t
+        for tc in ie.get_testcases(include_onlymatching):
+            yield tc
 
 
 md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
@@ -120,6 +110,20 @@ def expect_info_dict(self, got_dict, expected_dict):
         else:
             if isinstance(expected, compat_str) and expected.startswith('md5:'):
                 got = 'md5:' + md5(got_dict.get(info_field))
+            elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
+                got = got_dict.get(info_field)
+                self.assertTrue(
+                    isinstance(got, list),
+                    'Expected field %s to be a list, but it is of type %s' % (
+                        info_field, type(got).__name__))
+                expected_num = int(expected.partition(':')[2])
+                assertGreaterEqual(
+                    self, len(got), expected_num,
+                    'Expected %d items in field %s, but only got %d' % (
+                        expected_num, info_field, len(got)
+                    )
+                )
+                continue
             else:
                 got = got_dict.get(info_field)
             self.assertEqual(expected, got,
index 13c18ed95d4ea65111b6a5bc1406d0a5703336c2..be8d12997a1a5aba2cb62270068363f339a5eac6 100644 (file)
@@ -40,5 +40,23 @@ class TestInfoExtractor(unittest.TestCase):
         self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
         self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
 
+    def test_html_search_meta(self):
+        ie = self.ie
+        html = '''
+            <meta name="a" content="1" />
+            <meta name='b' content='2'>
+            <meta name="c" content='3'>
+            <meta name=d content='4'>
+            <meta property="e" content='5' >
+            <meta content="6" name="f">
+        '''
+
+        self.assertEqual(ie._html_search_meta('a', html), '1')
+        self.assertEqual(ie._html_search_meta('b', html), '2')
+        self.assertEqual(ie._html_search_meta('c', html), '3')
+        self.assertEqual(ie._html_search_meta('d', html), '4')
+        self.assertEqual(ie._html_search_meta('e', html), '5')
+        self.assertEqual(ie._html_search_meta('f', html), '6')
+
 if __name__ == '__main__':
     unittest.main()
index f8e4f930ebe6d5aefdbf128909b1be720ec046f3..85d87f2c31e803aff668f1d71a6bbdfba33cdcd8 100644 (file)
@@ -8,6 +8,8 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+import copy
+
 from test.helper import FakeYDL, assertRegexpMatches
 from youtube_dl import YoutubeDL
 from youtube_dl.extractor import YoutubeIE
@@ -192,6 +194,37 @@ class TestFormatSelection(unittest.TestCase):
         downloaded = ydl.downloaded_info_dicts[0]
         self.assertEqual(downloaded['format_id'], 'vid-high')
 
+    def test_format_selection_audio_exts(self):
+        formats = [
+            {'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+            {'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+            {'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+            {'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
+            {'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
+        ]
+
+        info_dict = _make_result(formats)
+        ydl = YDL({'format': 'best'})
+        ie = YoutubeIE(ydl)
+        ie._sort_formats(info_dict['formats'])
+        ydl.process_ie_result(copy.deepcopy(info_dict))
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'aac-64')
+
+        ydl = YDL({'format': 'mp3'})
+        ie = YoutubeIE(ydl)
+        ie._sort_formats(info_dict['formats'])
+        ydl.process_ie_result(copy.deepcopy(info_dict))
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'mp3-64')
+
+        ydl = YDL({'prefer_free_formats': True})
+        ie = YoutubeIE(ydl)
+        ie._sort_formats(info_dict['formats'])
+        ydl.process_ie_result(copy.deepcopy(info_dict))
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'ogg-64')
+
     def test_format_selection_video(self):
         formats = [
             {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
@@ -218,7 +251,7 @@ class TestFormatSelection(unittest.TestCase):
             # 3D
             '85', '84', '102', '83', '101', '82', '100',
             # Dash video
-            '138', '137', '248', '136', '247', '135', '246',
+            '137', '248', '136', '247', '135', '246',
             '245', '244', '134', '243', '133', '242', '160',
             # Dash audio
             '141', '172', '140', '171', '139',
index 5be065c437ae978d8c4ccc1a67bea87a599a77ed..6f5513faa2c5551ce3f3c9d2967a55b28adad858 100644 (file)
@@ -45,11 +45,6 @@ class TestAgeRestriction(unittest.TestCase):
             'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
             '505835.mp4', 2, old_age=25)
 
-    def test_pornotube(self):
-        self._assert_restricted(
-            'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
-            '1689755.flv', 13)
-
 
 if __name__ == '__main__':
     unittest.main()
index d345651918f04aeeae5c6e38a023e17a9414a974..6336dd317ca5a77ebced2e55d3c49873b58ebda6 100644 (file)
@@ -17,6 +17,7 @@ from youtube_dl.extractor import (
     TEDIE,
     VimeoIE,
     WallaIE,
+    CeskaTelevizeIE,
 )
 
 
@@ -317,5 +318,32 @@ class TestWallaSubtitles(BaseTestSubtitles):
         self.assertEqual(len(subtitles), 0)
 
 
+class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
+    url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
+    IE = CeskaTelevizeIE
+
+    def test_list_subtitles(self):
+        self.DL.expect_warning('Automatic Captions not supported by this server')
+        self.DL.params['listsubtitles'] = True
+        info_dict = self.getInfoDict()
+        self.assertEqual(info_dict, None)
+
+    def test_allsubtitles(self):
+        self.DL.expect_warning('Automatic Captions not supported by this server')
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['cs']))
+        self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
+
+    def test_nosubtitles(self):
+        self.DL.expect_warning('video doesn\'t have subtitles')
+        self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(len(subtitles), 0)
+
+
 if __name__ == '__main__':
     unittest.main()
index dd49a6d179dc7cea01935be7c019292b4f3559dc..16e1a1ddfdef5be7c0ba941d8793b3c8b88faf08 100644 (file)
@@ -16,6 +16,7 @@ import json
 import xml.etree.ElementTree
 
 from youtube_dl.utils import (
+    age_restricted,
     args_to_str,
     clean_html,
     DateRange,
@@ -402,5 +403,12 @@ Trying to open render node...
 Success at /dev/dri/renderD128.
 ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
 
+    def test_age_restricted(self):
+        self.assertFalse(age_restricted(None, 10))  # unrestricted content
+        self.assertFalse(age_restricted(1, None))  # unrestricted policy
+        self.assertFalse(age_restricted(8, 10))
+        self.assertTrue(age_restricted(18, 14))
+        self.assertFalse(age_restricted(18, 18))
+
 if __name__ == '__main__':
     unittest.main()
index e2b823f667f71196f375a8c4c56f061403ade27f..61675d8ec8d7e6c2b7a3bb143dfcb3355bd1fe0a 100755 (executable)
@@ -63,6 +63,7 @@ from .utils import (
     YoutubeDLHandler,
     prepend_extension,
     args_to_str,
+    age_restricted,
 )
 from .cache import Cache
 from .extractor import get_info_extractor, gen_extractors
@@ -202,6 +203,7 @@ class YoutubeDL(object):
 
                        Progress hooks are guaranteed to be called at least once
                        (with status "finished") if the download is successful.
+    merge_output_format: Extension to use when merging formats.
 
 
     The following parameters are not used by YoutubeDL itself, they are used by
@@ -550,13 +552,8 @@ class YoutubeDL(object):
             max_views = self.params.get('max_views')
             if max_views is not None and view_count > max_views:
                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
-        age_limit = self.params.get('age_limit')
-        if age_limit is not None:
-            actual_age_limit = info_dict.get('age_limit')
-            if actual_age_limit is None:
-                actual_age_limit = 0
-            if age_limit < actual_age_limit:
-                return 'Skipping "' + title + '" because it is age restricted'
+        if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
+            return 'Skipping "%s" because it is age restricted' % title
         if self.in_download_archive(info_dict):
             return '%s has already been recorded in archive' % video_title
         return None
@@ -790,7 +787,7 @@ class YoutubeDL(object):
             if video_formats:
                 return video_formats[0]
         else:
-            extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
+            extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
             if format_spec in extensions:
                 filter_f = lambda f: f['ext'] == format_spec
             else:
@@ -913,10 +910,23 @@ class YoutubeDL(object):
                                                   'contain the video, try using '
                                                   '"-f %s+%s"' % (format_2, format_1))
                                 return
+                            output_ext = (
+                                formats_info[0]['ext']
+                                if self.params.get('merge_output_format') is None
+                                else self.params['merge_output_format'])
                             selected_format = {
                                 'requested_formats': formats_info,
                                 'format': rf,
                                 'ext': formats_info[0]['ext'],
+                                'width': formats_info[0].get('width'),
+                                'height': formats_info[0].get('height'),
+                                'resolution': formats_info[0].get('resolution'),
+                                'fps': formats_info[0].get('fps'),
+                                'vcodec': formats_info[0].get('vcodec'),
+                                'vbr': formats_info[0].get('vbr'),
+                                'acodec': formats_info[1].get('acodec'),
+                                'abr': formats_info[1].get('abr'),
+                                'ext': output_ext,
                             }
                         else:
                             selected_format = None
@@ -1333,7 +1343,9 @@ class YoutubeDL(object):
         formats = info_dict.get('formats', [info_dict])
         idlen = max(len('format code'),
                     max(len(f['format_id']) for f in formats))
-        formats_s = [line(f, idlen) for f in formats]
+        formats_s = [
+            line(f, idlen) for f in formats
+            if f.get('preference') is None or f['preference'] >= -1000]
         if len(formats) > 1:
             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
index e7932032326c4217e64698f047f99231c92a40ff..8e7b7446663644bde08fa991c0dfcb67dfba674f 100644 (file)
@@ -38,7 +38,7 @@ from .update import update_self
 from .downloader import (
     FileDownloader,
 )
-from .extractor import gen_extractors
+from .extractor import gen_extractors, list_extractors
 from .YoutubeDL import YoutubeDL
 
 
@@ -95,24 +95,22 @@ def _real_main(argv=None):
     _enc = preferredencoding()
     all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
 
-    extractors = gen_extractors()
-
     if opts.list_extractors:
-        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
+        for ie in list_extractors(opts.age_limit):
             compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
             matchedUrls = [url for url in all_urls if ie.suitable(url)]
             for mu in matchedUrls:
                 compat_print('  ' + mu)
         sys.exit(0)
     if opts.list_extractor_descriptions:
-        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
+        for ie in list_extractors(opts.age_limit):
             if not ie._WORKING:
                 continue
             desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
             if desc is False:
                 continue
             if hasattr(ie, 'SEARCH_KEY'):
-                _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny')
+                _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
                 _COUNTS = ('', '5', '10', 'all')
                 desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
             compat_print(desc)
@@ -168,6 +166,7 @@ def _real_main(argv=None):
     if opts.recodevideo is not None:
         if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
             parser.error('invalid video recode format specified')
+
     if opts.date is not None:
         date = DateRange.day(opts.date)
     else:
@@ -199,7 +198,8 @@ def _real_main(argv=None):
                      ' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
                      ' template'.format(outtmpl))
 
-    any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
+    any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
+    any_printing = opts.print_json
     download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
 
     # PostProcessors
@@ -245,7 +245,7 @@ def _real_main(argv=None):
         'password': opts.password,
         'twofactor': opts.twofactor,
         'videopassword': opts.videopassword,
-        'quiet': (opts.quiet or any_printing),
+        'quiet': (opts.quiet or any_getting or any_printing),
         'no_warnings': opts.no_warnings,
         'forceurl': opts.geturl,
         'forcetitle': opts.gettitle,
@@ -255,9 +255,9 @@ def _real_main(argv=None):
         'forceduration': opts.getduration,
         'forcefilename': opts.getfilename,
         'forceformat': opts.getformat,
-        'forcejson': opts.dumpjson,
+        'forcejson': opts.dumpjson or opts.print_json,
         'dump_single_json': opts.dump_single_json,
-        'simulate': opts.simulate or any_printing,
+        'simulate': opts.simulate or any_getting,
         'skip_download': opts.skip_download,
         'format': opts.format,
         'format_limit': opts.format_limit,
@@ -324,6 +324,7 @@ def _real_main(argv=None):
         'encoding': opts.encoding,
         'exec_cmd': opts.exec_cmd,
         'extract_flat': opts.extract_flat,
+        'merge_output_format': opts.merge_output_format,
         'postprocessors': postprocessors,
     }
 
@@ -365,3 +366,5 @@ def main(argv=None):
         sys.exit('ERROR: fixed output name but more than one file to download')
     except KeyboardInterrupt:
         sys.exit('\nERROR: Interrupted by user')
+
+__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
index f9f6f3e734c9411be705667bd70ef10620008dc1..c460c167a2db78be7a1cdb8e81a3efe89e677ed1 100644 (file)
@@ -187,24 +187,34 @@ def build_fragments_list(boot_info):
     return res
 
 
-def write_flv_header(stream, metadata):
-    """Writes the FLV header and the metadata to stream"""
+def write_unsigned_int(stream, val):
+    stream.write(struct_pack('!I', val))
+
+
+def write_unsigned_int_24(stream, val):
+    stream.write(struct_pack('!I', val)[1:])
+
+
+def write_flv_header(stream):
+    """Writes the FLV header to stream"""
     # FLV header
     stream.write(b'FLV\x01')
     stream.write(b'\x05')
     stream.write(b'\x00\x00\x00\x09')
-    # FLV File body
     stream.write(b'\x00\x00\x00\x00')
-    # FLVTAG
-    # Script data
-    stream.write(b'\x12')
-    # Size of the metadata with 3 bytes
-    stream.write(struct_pack('!L', len(metadata))[1:])
-    stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
-    stream.write(metadata)
-    # Magic numbers extracted from the output files produced by AdobeHDS.php
-    # (https://github.com/K-S-V/Scripts)
-    stream.write(b'\x00\x00\x01\x73')
+
+
+def write_metadata_tag(stream, metadata):
+    """Writes optional metadata tag to stream"""
+    SCRIPT_TAG = b'\x12'
+    FLV_TAG_HEADER_LEN = 11
+
+    if metadata:
+        stream.write(SCRIPT_TAG)
+        write_unsigned_int_24(stream, len(metadata))
+        stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
+        stream.write(metadata)
+        write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
 
 
 def _add_ns(prop):
@@ -256,7 +266,11 @@ class F4mFD(FileDownloader):
             bootstrap = self.ydl.urlopen(bootstrap_url).read()
         else:
             bootstrap = base64.b64decode(bootstrap_node.text)
-        metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
+        metadata_node = media.find(_add_ns('metadata'))
+        if metadata_node is not None:
+            metadata = base64.b64decode(metadata_node.text)
+        else:
+            metadata = None
         boot_info = read_bootstrap_info(bootstrap)
 
         fragments_list = build_fragments_list(boot_info)
@@ -269,7 +283,8 @@ class F4mFD(FileDownloader):
 
         tmpfilename = self.temp_name(filename)
         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
-        write_flv_header(dest_stream, metadata)
+        write_flv_header(dest_stream)
+        write_metadata_tag(dest_stream, metadata)
 
         # This dict stores the download progress, it's updated by the progress
         # hook
index 5bb0f3cfd19632f126d4f7d7b0df407b608778fc..aa58b52abb5998ba8879e6eba3a1d974484467e2 100644 (file)
@@ -11,7 +11,6 @@ from ..compat import (
     compat_urllib_request,
 )
 from ..utils import (
-    check_executable,
     encodeFilename,
 )
 
@@ -27,16 +26,13 @@ class HlsFD(FileDownloader):
             '-bsf:a', 'aac_adtstoasc',
             encodeFilename(tmpfilename, for_subprocess=True)]
 
-        for program in ['avconv', 'ffmpeg']:
-            if check_executable(program, ['-version']):
-                break
-        else:
+        ffpp = FFmpegPostProcessor(downloader=self)
+        program = ffpp._executable
+        if program is None:
             self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
             return False
-        cmd = [program] + args
-
-        ffpp = FFmpegPostProcessor(downloader=self)
         ffpp.check_version()
+        cmd = [program] + args
 
         retval = subprocess.call(cmd)
         if retval == 0:
index c53195da0c9471d55a61b53b1041e05ee209697e..72cef30eaf3718ad8932814a627042cc0bdff361 100644 (file)
@@ -4,8 +4,8 @@ import os
 import subprocess
 
 from .common import FileDownloader
-from ..compat import compat_subprocess_get_DEVNULL
 from ..utils import (
+    check_executable,
     encodeFilename,
 )
 
@@ -20,11 +20,7 @@ class MplayerFD(FileDownloader):
             'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
             '-dumpstream', '-dumpfile', tmpfilename, url]
         # Check for mplayer first
-        try:
-            subprocess.call(
-                ['mplayer', '-h'],
-                stdout=compat_subprocess_get_DEVNULL(), stderr=subprocess.STDOUT)
-        except (OSError, IOError):
+        if not check_executable('mplayer', ['-h']):
             self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
             return False
 
index e4c51f238f210704e5aaf220349cc2fe6d145f36..f544e87f1222eae4afd300309ed1f342eae78aa3 100644 (file)
@@ -71,6 +71,7 @@ from .cnn import (
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
 from .comcarcoff import ComCarCoffIE
+from .commonmistakes import CommonMistakesIE
 from .condenast import CondeNastIE
 from .cracked import CrackedIE
 from .criterion import CriterionIE
@@ -158,6 +159,7 @@ from .gametrailers import GametrailersIE
 from .gdcvault import GDCVaultIE
 from .generic import GenericIE
 from .giantbomb import GiantBombIE
+from .giga import GigaIE
 from .glide import GlideIE
 from .globo import GloboIE
 from .godtube import GodTubeIE
@@ -272,6 +274,7 @@ from .nbc import (
 )
 from .ndr import NDRIE
 from .ndtv import NDTVIE
+from .netzkino import NetzkinoIE
 from .nerdcubed import NerdCubedFeedIE
 from .newgrounds import NewgroundsIE
 from .newstube import NewstubeIE
@@ -324,6 +327,7 @@ from .prosiebensat1 import ProSiebenSat1IE
 from .pyvideo import PyvideoIE
 from .quickvid import QuickVidIE
 from .radiode import RadioDeIE
+from .radiobremen import RadioBremenIE
 from .radiofrance import RadioFranceIE
 from .rai import RaiIE
 from .rbmaradio import RBMARadioIE
@@ -344,6 +348,7 @@ from .ruhd import RUHDIE
 from .rutube import (
     RutubeIE,
     RutubeChannelIE,
+    RutubeEmbedIE,
     RutubeMovieIE,
     RutubePersonIE,
 )
@@ -473,6 +478,7 @@ from .videott import VideoTtIE
 from .videoweed import VideoWeedIE
 from .vidme import VidmeIE
 from .vidzi import VidziIE
+from .vier import VierIE, VierVideosIE
 from .vimeo import (
     VimeoIE,
     VimeoAlbumIE,
@@ -508,6 +514,7 @@ from .wdr import (
     WDRMobileIE,
     WDRMausIE,
 )
+from .webofstories import WebOfStoriesIE
 from .weibo import WeiboIE
 from .wimp import WimpIE
 from .wistia import WistiaIE
@@ -543,7 +550,7 @@ from .youtube import (
     YoutubeSearchURLIE,
     YoutubeShowIE,
     YoutubeSubscriptionsIE,
-    YoutubeTopListIE,
+    YoutubeTruncatedIDIE,
     YoutubeTruncatedURLIE,
     YoutubeUserIE,
     YoutubeWatchLaterIE,
@@ -569,6 +576,17 @@ def gen_extractors():
     return [klass() for klass in _ALL_CLASSES]
 
 
+def list_extractors(age_limit):
+    """
+    Return a list of extractors that are suitable for the given age,
+    sorted by extractor ID.
+    """
+
+    return sorted(
+        filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
+        key=lambda ie: ie.IE_NAME.lower())
+
+
 def get_info_extractor(ie_name):
     """Returns the info extractor class with the given ie_name"""
     return globals()[ie_name + 'IE']
index 014a219522d5de5ab85415cb8aeca0a93561a409..a1b666be0a4ce1610cfad79f5393ec23a9427bc8 100644 (file)
@@ -7,6 +7,7 @@ from ..compat import compat_urllib_parse
 from ..utils import (
     determine_ext,
     ExtractorError,
+    remove_end,
 )
 
 
@@ -27,23 +28,18 @@ class AUEngineIE(InfoExtractor):
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
-        title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', webpage, 'title')
-        title = title.strip()
-        links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
-        links = map(compat_urllib_parse.unquote, links)
-
-        thumbnail = None
-        video_url = None
-        for link in links:
-            if link.endswith('.png'):
-                thumbnail = link
-            elif '/videos/' in link:
-                video_url = link
+        title = self._html_search_regex(
+            r'<title>\s*(?P<title>.+?)\s*</title>', webpage, 'title')
+        video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage)
+        video_url = compat_urllib_parse.unquote(video_urls[0])
+        thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage)
+        thumbnail = compat_urllib_parse.unquote(thumbnails[0])
+
         if not video_url:
             raise ExtractorError('Could not find video URL')
+
         ext = '.' + determine_ext(video_url)
-        if ext == title[-len(ext):]:
-            title = title[:-len(ext)]
+        title = remove_end(title, ext)
 
         return {
             'id': video_id,
index f690dc803c9b818342a311c02b414d9729943baf..1cf48fe0dd739b328478899a83f0d8aba94e6c4a 100644 (file)
@@ -10,7 +10,7 @@ from ..compat import compat_HTTPError
 class BBCCoUkIE(SubtitlesInfoExtractor):
     IE_NAME = 'bbc.co.uk'
     IE_DESC = 'BBC iPlayer'
-    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
+    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
 
     _TESTS = [
         {
@@ -18,8 +18,8 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
             'info_dict': {
                 'id': 'b039d07m',
                 'ext': 'flv',
-                'title': 'Kaleidoscope: Leonard Cohen',
-                'description': 'md5:db4755d7a665ae72343779f7dacb402c',
+                'title': 'Kaleidoscope, Leonard Cohen',
+                'description': 'The Canadian poet and songwriter reflects on his musical career.',
                 'duration': 1740,
             },
             'params': {
@@ -84,6 +84,40 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
                 # rtmp download
                 'skip_download': True,
             }
+        }, {
+            'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
+            'note': 'Audio',
+            'info_dict': {
+                'id': 'p02frcch',
+                'ext': 'flv',
+                'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
+                'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
+                'duration': 3507,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            }
+        }, {
+            'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
+            'note': 'Video',
+            'info_dict': {
+                'id': 'p025c103',
+                'ext': 'flv',
+                'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
+                'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
+                'duration': 226,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            }
+        }, {
+            'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
+            'only_matching': True,
+        }, {
+            'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
+            'only_matching': True,
         }
     ]
 
@@ -241,8 +275,8 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
 
         # fallback to legacy playlist
         playlist = self._download_xml(
-                'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id,
-                playlist_id, 'Downloading legacy playlist XML')
+            'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id,
+            playlist_id, 'Downloading legacy playlist XML')
 
         no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
         if no_items is not None:
index 003e50002c76e403e6a60defaafcf7595dd40052..d2abd4d772c95e9877a607af7cc0b6e4d56e123a 100644 (file)
@@ -16,7 +16,7 @@ class BetIE(InfoExtractor):
         {
             'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
             'info_dict': {
-                'id': '417cd61c-c793-4e8e-b006-e445ecc45add',
+                'id': '740ab250-bb94-4a8a-8787-fe0de7c74471',
                 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
                 'ext': 'flv',
                 'title': 'BET News Presents: A Conversation With President Obama',
@@ -35,7 +35,7 @@ class BetIE(InfoExtractor):
         {
             'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
             'info_dict': {
-                'id': '4160e53b-ad41-43b1-980f-8d85f63121f4',
+                'id': 'bcd1b1df-673a-42cf-8d01-b282db608f2d',
                 'display_id': 'justice-for-ferguson-a-community-reacts',
                 'ext': 'flv',
                 'title': 'Justice for Ferguson: A Community Reacts',
@@ -55,7 +55,6 @@ class BetIE(InfoExtractor):
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
-
         webpage = self._download_webpage(url, display_id)
 
         media_url = compat_urllib_parse.unquote(self._search_regex(
index 241b904a9e57f7cc3e61b6f086550578feb71b05..75d744852edc382721cee8556067f89ccb0092df 100644 (file)
@@ -4,9 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_parse_qs
 from ..utils import (
-    ExtractorError,
     int_or_none,
     unified_strdate,
 )
@@ -54,45 +52,38 @@ class BiliBiliIE(InfoExtractor):
         thumbnail = self._html_search_meta(
             'thumbnailUrl', video_code, 'thumbnail', fatal=False)
 
-        player_params = compat_parse_qs(self._html_search_regex(
-            r'<iframe .*?class="player" src="https://secure\.bilibili\.(?:tv|com)/secure,([^"]+)"',
-            webpage, 'player params'))
+        cid = self._search_regex(r'cid=(\d+)', webpage, 'cid')
 
-        if 'cid' in player_params:
-            cid = player_params['cid'][0]
+        lq_doc = self._download_xml(
+            'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
+            video_id,
+            note='Downloading LQ video info'
+        )
+        lq_durl = lq_doc.find('./durl')
+        formats = [{
+            'format_id': 'lq',
+            'quality': 1,
+            'url': lq_durl.find('./url').text,
+            'filesize': int_or_none(
+                lq_durl.find('./size'), get_attr='text'),
+        }]
 
-            lq_doc = self._download_xml(
-                'http://interface.bilibili.cn/v_cdn_play?cid=%s' % cid,
-                video_id,
-                note='Downloading LQ video info'
-            )
-            lq_durl = lq_doc.find('.//durl')
-            formats = [{
-                'format_id': 'lq',
-                'quality': 1,
-                'url': lq_durl.find('./url').text,
+        hq_doc = self._download_xml(
+            'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid,
+            video_id,
+            note='Downloading HQ video info',
+            fatal=False,
+        )
+        if hq_doc is not False:
+            hq_durl = hq_doc.find('./durl')
+            formats.append({
+                'format_id': 'hq',
+                'quality': 2,
+                'ext': 'flv',
+                'url': hq_durl.find('./url').text,
                 'filesize': int_or_none(
-                    lq_durl.find('./size'), get_attr='text'),
-            }]
-
-            hq_doc = self._download_xml(
-                'http://interface.bilibili.cn/playurl?cid=%s' % cid,
-                video_id,
-                note='Downloading HQ video info',
-                fatal=False,
-            )
-            if hq_doc is not False:
-                hq_durl = hq_doc.find('.//durl')
-                formats.append({
-                    'format_id': 'hq',
-                    'quality': 2,
-                    'ext': 'flv',
-                    'url': hq_durl.find('./url').text,
-                    'filesize': int_or_none(
-                        hq_durl.find('./size'), get_attr='text'),
-                })
-        else:
-            raise ExtractorError('Unsupported player parameters: %r' % (player_params,))
+                    hq_durl.find('./size'), get_attr='text'),
+            })
 
         self._sort_formats(formats)
         return {
index a40a1bbc47b38529b7a5191bb0a6aeb96c15c532..a5d2af1749f188a086e5384b1de6a2441e624902 100644 (file)
@@ -33,7 +33,7 @@ class BuzzFeedIE(InfoExtractor):
             'skip_download': True,  # Got enough YouTube download tests
         },
         'info_dict': {
-            'description': 'Munchkin the Teddy Bear is back !',
+            'description': 're:Munchkin the Teddy Bear is back ?!',
             'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
         },
         'playlist': [{
@@ -42,9 +42,9 @@ class BuzzFeedIE(InfoExtractor):
                 'ext': 'mp4',
                 'upload_date': '20141124',
                 'uploader_id': 'CindysMunchkin',
-                'description': '© 2014 Munchkin the Shih Tzu\nAll rights reserved\nFacebook: http://facebook.com/MunchkintheShihTzu',
+                'description': 're:© 2014 Munchkin the Shih Tzu',
                 'uploader': 'Munchkin the Shih Tzu',
-                'title': 'Munchkin the Teddy Bear gets her exercise',
+                'title': 're:Munchkin the Teddy Bear gets her exercise',
             },
         }]
     }]
index 9873728df6f3bb1adbfddc1959aa5e7e70241f5b..11d18d74ace31a513a7c06be40baf6ce89c858b3 100644 (file)
@@ -5,6 +5,8 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
+    HEADRequest,
     unified_strdate,
     url_basename,
     qualities,
@@ -76,6 +78,16 @@ class CanalplusIE(InfoExtractor):
 
         preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS'])
 
+        fmt_url = next(iter(media.find('VIDEOS'))).text
+        if '/geo' in fmt_url.lower():
+            response = self._request_webpage(
+                HEADRequest(fmt_url), video_id,
+                'Checking if the video is georestricted')
+            if '/blocage' in response.geturl():
+                raise ExtractorError(
+                    'The video is not available in your country',
+                    expected=True)
+
         formats = []
         for fmt in media.find('VIDEOS'):
             format_url = fmt.text
index 2f866f3ef925c8402f00a3c0f922cf530eaa2010..f70e090bb5b01942713149493e48bc0e51f7f74b 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 
 import re
 
-from .common import InfoExtractor
+from .subtitles import SubtitlesInfoExtractor
 from ..compat import (
     compat_urllib_request,
     compat_urllib_parse,
@@ -11,49 +11,42 @@ from ..compat import (
 )
 from ..utils import (
     ExtractorError,
+    float_or_none,
 )
 
 
-class CeskaTelevizeIE(InfoExtractor):
+class CeskaTelevizeIE(SubtitlesInfoExtractor):
     _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
 
     _TESTS = [
         {
-            'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka',
+            'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
             'info_dict': {
-                'id': '213512120230004',
-                'ext': 'flv',
-                'title': 'První republika: Španělská chřipka',
-                'duration': 3107.4,
+                'id': '214411058091220',
+                'ext': 'mp4',
+                'title': 'Hyde Park Civilizace',
+                'description': 'Věda a současná civilizace. Interaktivní pořad - prostor pro vaše otázky a komentáře',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'duration': 3350,
             },
             'params': {
-                'skip_download': True,  # requires rtmpdump
+                # m3u8 download
+                'skip_download': True,
             },
-            'skip': 'Works only from Czech Republic.',
-        },
-        {
-            'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt',
-            'info_dict': {
-                'id': '20138143440',
-                'ext': 'flv',
-                'title': 'Tsatsiki, maminka a policajt',
-                'duration': 6754.1,
-            },
-            'params': {
-                'skip_download': True,  # requires rtmpdump
-            },
-            'skip': 'Works only from Czech Republic.',
         },
         {
             'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
             'info_dict': {
                 'id': '14716',
-                'ext': 'flv',
+                'ext': 'mp4',
                 'title': 'První republika: Zpěvačka z Dupárny Bobina',
-                'duration': 90,
+                'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'duration': 88.4,
             },
             'params': {
-                'skip_download': True,  # requires rtmpdump
+                # m3u8 download
+                'skip_download': True,
             },
         },
     ]
@@ -80,8 +73,9 @@ class CeskaTelevizeIE(InfoExtractor):
             'requestSource': 'iVysilani',
         }
 
-        req = compat_urllib_request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url',
-                                            data=compat_urllib_parse.urlencode(data))
+        req = compat_urllib_request.Request(
+            'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
+            data=compat_urllib_parse.urlencode(data))
 
         req.add_header('Content-type', 'application/x-www-form-urlencoded')
         req.add_header('x-addr', '127.0.0.1')
@@ -90,39 +84,72 @@ class CeskaTelevizeIE(InfoExtractor):
 
         playlistpage = self._download_json(req, video_id)
 
-        req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url']))
+        playlist_url = playlistpage['url']
+        if playlist_url == 'error_region':
+            raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
+
+        req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlist_url))
         req.add_header('Referer', url)
 
-        playlist = self._download_xml(req, video_id)
+        playlist = self._download_json(req, video_id)
 
+        item = playlist['playlist'][0]
         formats = []
-        for i in playlist.find('smilRoot/body'):
-            if 'AD' not in i.attrib['id']:
-                base_url = i.attrib['base']
-                parsedurl = compat_urllib_parse_urlparse(base_url)
-                duration = i.attrib['duration']
-
-                for video in i.findall('video'):
-                    if video.attrib['label'] != 'AD':
-                        format_id = video.attrib['label']
-                        play_path = video.attrib['src']
-                        vbr = int(video.attrib['system-bitrate'])
-
-                        formats.append({
-                            'format_id': format_id,
-                            'url': base_url,
-                            'vbr': vbr,
-                            'play_path': play_path,
-                            'app': parsedurl.path[1:] + '?' + parsedurl.query,
-                            'rtmp_live': True,
-                            'ext': 'flv',
-                        })
-
+        for format_id, stream_url in item['streamUrls'].items():
+            formats.extend(self._extract_m3u8_formats(stream_url, video_id, 'mp4'))
         self._sort_formats(formats)
 
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+        duration = float_or_none(item.get('duration'))
+        thumbnail = item.get('previewImageUrl')
+
+        subtitles = {}
+        subs = item.get('subtitles')
+        if subs:
+            subtitles['cs'] = subs[0]['url']
+
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(video_id, subtitles)
+            return
+
+        subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
+
         return {
             'id': episode_id,
-            'title': self._html_search_regex(r'<title>(.+?) — iVysílání — Česká televize</title>', webpage, 'title'),
-            'duration': float(duration),
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
             'formats': formats,
+            'subtitles': subtitles,
         }
+
+    @staticmethod
+    def _fix_subtitles(subtitles):
+        """ Convert millisecond-based subtitles to SRT """
+        if subtitles is None:
+            return subtitles  # subtitles not requested
+
+        def _msectotimecode(msec):
+            """ Helper utility to convert milliseconds to timecode """
+            components = []
+            for divider in [1000, 60, 60, 100]:
+                components.append(msec % divider)
+                msec //= divider
+            return "{3:02}:{2:02}:{1:02},{0:03}".format(*components)
+
+        def _fix_subtitle(subtitle):
+            for line in subtitle.splitlines():
+                m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line)
+                if m:
+                    yield m.group(1)
+                    start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:])
+                    yield "{0} --> {1}".format(start, stop)
+                else:
+                    yield line
+
+        fixed_subtitles = {}
+        for k, v in subtitles.items():
+            fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
+        return fixed_subtitles
index 6e264f6870811b31b1582cfd022d51799a18968a..b4cd59e4318a52019e060250499b1d50d1e01a8b 100644 (file)
@@ -21,6 +21,7 @@ from ..compat import (
     compat_str,
 )
 from ..utils import (
+    age_restricted,
     clean_html,
     compiled_regex_type,
     ExtractorError,
@@ -92,6 +93,8 @@ class InfoExtractor(object):
                                  by this field, regardless of all other values.
                                  -1 for default (order by other properties),
                                  -2 or smaller for less than default.
+                                 < -1000 to hide the format (if there is
+                                    another one which is strictly better)
                     * language_preference  Is this in the correct requested
                                  language?
                                  10 if it's what the URL is about,
@@ -144,6 +147,17 @@ class InfoExtractor(object):
     like_count:     Number of positive ratings of the video
     dislike_count:  Number of negative ratings of the video
     comment_count:  Number of comments on the video
+    comments:       A list of comments, each with one or more of the following
+                    properties (all but one of text or html optional):
+                        * "author" - human-readable name of the comment author
+                        * "author_id" - user ID of the comment author
+                        * "id" - Comment ID
+                        * "html" - Comment as HTML
+                        * "text" - Plain text of the comment
+                        * "timestamp" - UNIX timestamp of comment
+                        * "parent" - ID of the comment this one is replying to.
+                                     Set to "root" to indicate that this is a
+                                     comment to the original video.
     age_limit:      Age restriction for the video, as an integer (years)
     webpage_url:    The url to the video webpage, if given to youtube-dl it
                     should allow to get the same result again. (It will be set
@@ -591,7 +605,7 @@ class InfoExtractor(object):
         return self._html_search_regex(
             r'''(?isx)<meta
                     (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
-                    [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
+                    [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
             html, display_name, fatal=fatal, group='content', **kwargs)
 
     def _dc_search_uploader(self, html):
@@ -875,6 +889,35 @@ class InfoExtractor(object):
             None, '/', True, False, expire_time, '', None, None, None)
         self._downloader.cookiejar.set_cookie(cookie)
 
+    def get_testcases(self, include_onlymatching=False):
+        t = getattr(self, '_TEST', None)
+        if t:
+            assert not hasattr(self, '_TESTS'), \
+                '%s has _TEST and _TESTS' % type(self).__name__
+            tests = [t]
+        else:
+            tests = getattr(self, '_TESTS', [])
+        for t in tests:
+            if not include_onlymatching and t.get('only_matching', False):
+                continue
+            t['name'] = type(self).__name__[:-len('IE')]
+            yield t
+
+    def is_suitable(self, age_limit):
+        """ Test whether the extractor is generally suitable for the given
+        age limit (i.e. pornographic sites are not, all others usually are) """
+
+        any_restricted = False
+        for tc in self.get_testcases(include_onlymatching=False):
+            if 'playlist' in tc:
+                tc = tc['playlist'][0]
+            is_restricted = age_restricted(
+                tc.get('info_dict', {}).get('age_limit'), age_limit)
+            if not is_restricted:
+                return True
+            any_restricted = any_restricted or is_restricted
+        return not any_restricted
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py
new file mode 100644 (file)
index 0000000..75c0690
--- /dev/null
@@ -0,0 +1,29 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class CommonMistakesIE(InfoExtractor):
+    IE_DESC = False  # Do not list
+    _VALID_URL = r'''(?x)
+        (?:url|URL)
+    '''
+
+    _TESTS = [{
+        'url': 'url',
+        'only_matching': True,
+    }, {
+        'url': 'URL',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        msg = (
+            'You\'ve asked youtube-dl to download the URL "%s". '
+            'That doesn\'t make any sense. '
+            'Simply remove the parameter in your command or configuration.'
+        ) % url
+        if self._downloader.params.get('verbose'):
+            msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
+        raise ExtractorError(msg, expected=True)
index 354046a9e1428bac87f5d9709d16d43308c84555..1680f532f80167a65c2dbdc3b5bc0bfa83f7fc66 100644 (file)
@@ -228,7 +228,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
         video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
 
         formats = []
-        for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
+        for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
             stream_quality, stream_format = self._FORMAT_IDS[fmt]
             video_format = fmt + 'p'
             streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
index 52c2d7ddf99873779b7f3223b0acfe4563e2b5d9..d3e6675283cddcb8f6a6dfffbfbd1e1ea3da11bc 100644 (file)
@@ -1,47 +1,45 @@
 from __future__ import unicode_literals
 
-import re
-import json
-
 from .common import InfoExtractor
+from ..utils import (
+    parse_iso8601,
+    int_or_none,
+)
 
 
 class DiscoveryIE(InfoExtractor):
-    _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
+    _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
     _TEST = {
         'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
-        'md5': 'e12614f9ee303a6ccef415cb0793eba2',
+        'md5': '3c69d77d9b0d82bfd5e5932a60f26504',
         'info_dict': {
-            'id': '614784',
-            'ext': 'mp4',
-            'title': 'MythBusters: Mission Impossible Outtakes',
+            'id': 'mission-impossible-outtakes',
+            'ext': 'flv',
+            'title': 'Mission Impossible Outtakes',
             'description': ('Watch Jamie Hyneman and Adam Savage practice being'
                             ' each other -- to the point of confusing Jamie\'s dog -- and '
                             'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
                             ' back.'),
             'duration': 156,
+            'timestamp': 1303099200,
+            'upload_date': '20110418',
         },
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        video_list_json = self._search_regex(r'var videoListJSON = ({.*?});',
-                                             webpage, 'video list', flags=re.DOTALL)
-        video_list = json.loads(video_list_json)
-        info = video_list['clips'][0]
-        formats = []
-        for f in info['mp4']:
-            formats.append(
-                {'url': f['src'], 'ext': 'mp4', 'tbr': int(f['bitrate'][:-1])})
+        info = self._parse_json(self._search_regex(
+            r'(?s)<script type="application/ld\+json">(.*?)</script>',
+            webpage, 'video info'), video_id)
 
         return {
-            'id': info['contentId'],
-            'title': video_list['name'],
-            'formats': formats,
-            'description': info['videoCaption'],
-            'thumbnail': info.get('videoStillURL') or info.get('thumbnailURL'),
-            'duration': info['duration'],
+            'id': video_id,
+            'title': info['name'],
+            'url': info['contentURL'],
+            'description': info.get('description'),
+            'thumbnail': info.get('thumbnailUrl'),
+            'timestamp': parse_iso8601(info.get('uploadDate')),
+            'duration': int_or_none(info.get('duration')),
         }
index 3e7923648992d334357bad7206d745a17313b23e..fc92ff8253734f151fa973ea4979adbe5c6063bf 100644 (file)
@@ -1,7 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
 import json
 
 from .common import InfoExtractor
@@ -12,32 +11,49 @@ from ..utils import (
 
 
 class EllenTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
+    _TESTS = [{
         'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
         'md5': 'e4af06f3bf0d5f471921a18db5764642',
         'info_dict': {
             'id': '0-7jqrsr18',
             'ext': 'mp4',
             'title': 'What\'s Wrong with These Photos? A Whole Lot',
+            'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6',
             'timestamp': 1406876400,
             'upload_date': '20140801',
         }
-    }
+    }, {
+        'url': 'http://ellentube.com/videos/0-dvzmabd5/',
+        'md5': '98238118eaa2bbdf6ad7f708e3e4f4eb',
+        'info_dict': {
+            'id': '0-dvzmabd5',
+            'ext': 'mp4',
+            'title': '1 year old twin sister makes her brother laugh',
+            'description': '1 year old twin sister makes her brother laugh',
+            'timestamp': 1419542075,
+            'upload_date': '20141225',
+        }
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
+        video_url = self._html_search_meta('VideoURL', webpage, 'url')
+        title = self._og_search_title(webpage, default=None) or self._search_regex(
+            r'pageName\s*=\s*"([^"]+)"', webpage, 'title')
+        description = self._html_search_meta(
+            'description', webpage, 'description') or self._og_search_description(webpage)
         timestamp = parse_iso8601(self._search_regex(
             r'<span class="publish-date"><time datetime="([^"]+)">',
             webpage, 'timestamp'))
 
         return {
             'id': video_id,
-            'title': self._og_search_title(webpage),
-            'url': self._html_search_meta('VideoURL', webpage, 'url'),
+            'url': video_url,
+            'title': title,
+            'description': description,
             'timestamp': timestamp,
         }
 
@@ -55,8 +71,7 @@ class EllenTVClipsIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        playlist_id = mobj.group('id')
+        playlist_id = self._match_id(url)
 
         webpage = self._download_webpage(url, playlist_id)
         playlist = self._extract_playlist(webpage)
index 4277202a2eea45afdcd750e3e22e651d5ac9342c..00a69e6312aede6069e062c6abff29137939daa9 100644 (file)
@@ -1,8 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import unified_strdate
 
@@ -24,9 +22,7 @@ class ElPaisIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         prefix = self._html_search_regex(
index d09d1c13a70cffb725329f69368f37359d7f7a08..190d9f9adc292bfc33d2085eb9bd057ec4c95502 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 class FKTVIE(InfoExtractor):
     IE_NAME = 'fernsehkritik.tv'
-    _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
+    _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
 
     _TEST = {
         'url': 'http://fernsehkritik.tv/folge-1',
@@ -26,29 +26,32 @@ class FKTVIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        episode = int(mobj.group('ep'))
+        episode = int(self._match_id(url))
 
-        server = random.randint(2, 4)
-        video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
-        start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
+        video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%s.jpg' % episode
+        start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/Start' % episode,
                                                episode)
         playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
                                       'playlist', flags=re.DOTALL)
         files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
-        # TODO: return a single multipart video
+
         videos = []
         for i, _ in enumerate(files, 1):
             video_id = '%04d%d' % (episode, i)
-            video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
+            video_url = 'http://fernsehkritik.tv/js/directme.php?file=%s%s.flv' % (episode, '' if i == 1 else '-%d' % i)
             videos.append({
+                'ext': 'flv',
                 'id': video_id,
                 'url': video_url,
                 'title': clean_html(get_element_by_id('eptitle', start_webpage)),
                 'description': clean_html(get_element_by_id('contentlist', start_webpage)),
                 'thumbnail': video_thumbnail
             })
-        return videos
+        return {
+            '_type': 'multi_video',
+            'entries': videos,
+            'id': 'folge-%s' % episode,
+        }
 
 
 class FKTVPosteckeIE(InfoExtractor):
index 75f180928e2c29f59d2d77c1d80e267f6bc8d0a0..a07d69841f9278b932754b603249dc9f8eea53a0 100644 (file)
@@ -57,8 +57,7 @@ class GameOneIE(InfoExtractor):
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
         og_video = self._og_search_video_url(webpage, secure=False)
index d453ec010937b1815bf3a22d568a70ce818224e6..fed968f5179ebf6159212da5ab75b024b3bc0a03 100644 (file)
@@ -39,7 +39,8 @@ class GDCVaultIE(InfoExtractor):
                 'id': '1015301',
                 'ext': 'flv',
                 'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
-            }
+            },
+            'skip': 'Requires login',
         }
     ]
 
index 40b2791c77351a8625894f709187b3ccfb8e1939..7a5bf939237ff45731fd3befca5ad0b7dfc0df1f 100644 (file)
@@ -131,12 +131,13 @@ class GenericIE(InfoExtractor):
         # ooyala video
         {
             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
-            'md5': '5644c6ca5d5782c1d0d350dad9bd840c',
+            'md5': '166dd577b433b4d4ebfee10b0824d8ff',
             'info_dict': {
                 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
                 'ext': 'mp4',
                 'title': '2cc213299525360.mov',  # that's what we get
             },
+            'add_ie': ['Ooyala'],
         },
         # google redirect
         {
@@ -146,7 +147,7 @@ class GenericIE(InfoExtractor):
                 'ext': 'mp4',
                 'upload_date': '20130224',
                 'uploader_id': 'TheVerge',
-                'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.',
+                'description': 're:^Chris Ziegler takes a look at the\.*',
                 'uploader': 'The Verge',
                 'title': 'First Firefox OS phones side-by-side',
             },
@@ -181,6 +182,14 @@ class GenericIE(InfoExtractor):
                 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
             },
         },
+        # BBC iPlayer embeds
+        {
+            'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
+            'info_dict': {
+                'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
+            },
+            'playlist_mincount': 18,
+        },
         # RUTV embed
         {
             'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
@@ -699,9 +708,9 @@ class GenericIE(InfoExtractor):
             r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
 
         # Helper method
-        def _playlist_from_matches(matches, getter, ie=None):
+        def _playlist_from_matches(matches, getter=None, ie=None):
             urlrs = orderedSet(
-                self.url_result(self._proto_relative_url(getter(m)), ie)
+                self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
                 for m in matches)
             return self.playlist_result(
                 urlrs, playlist_id=video_id, playlist_title=video_title)
@@ -905,6 +914,11 @@ class GenericIE(InfoExtractor):
             return _playlist_from_matches(
                 matches, getter=unescapeHTML, ie='FunnyOrDie')
 
+        # Look for BBC iPlayer embed
+        matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
+        if matches:
+            return _playlist_from_matches(matches, ie='BBCCoUk')
+
         # Look for embedded RUTV player
         rutv_url = RUTVIE._extract_url(webpage)
         if rutv_url:
@@ -912,7 +926,7 @@ class GenericIE(InfoExtractor):
 
         # Look for embedded TED player
         mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'TED')
 
diff --git a/youtube_dl/extractor/giga.py b/youtube_dl/extractor/giga.py
new file mode 100644 (file)
index 0000000..7758901
--- /dev/null
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+    qualities,
+    compat_str,
+    parse_duration,
+    parse_iso8601,
+    str_to_int,
+)
+
+
+class GigaIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)'
+    _TESTS = [{
+        'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/',
+        'md5': '6bc5535e945e724640664632055a584f',
+        'info_dict': {
+            'id': '2622086',
+            'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss',
+            'ext': 'mp4',
+            'title': 'Anime Awesome: Chihiros Reise ins Zauberland – Das Beste kommt zum Schluss',
+            'description': 'md5:afdf5862241aded4718a30dff6a57baf',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 578,
+            'timestamp': 1414749706,
+            'upload_date': '20141031',
+            'uploader': 'Robin Schweiger',
+            'view_count': int,
+        },
+    }, {
+        'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            [r'data-video-id="(\d+)"', r'/api/video/jwplayer/#v=(\d+)'],
+            webpage, 'video id')
+
+        playlist = self._download_json(
+            'http://www.giga.de/api/syndication/video/video_id/%s/playlist.json?content=syndication/key/368b5f151da4ae05ced7fa296bdff65a/'
+            % video_id, video_id)[0]
+
+        quality = qualities(['normal', 'hd720'])
+
+        formats = []
+        for format_id in itertools.count(0):
+            fmt = playlist.get(compat_str(format_id))
+            if not fmt:
+                break
+            formats.append({
+                'url': fmt['src'],
+                'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]),
+                'quality': quality(fmt['quality']),
+            })
+        self._sort_formats(formats)
+
+        title = self._html_search_meta(
+            'title', webpage, 'title', fatal=True)
+        description = self._html_search_meta(
+            'description', webpage, 'description')
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        duration = parse_duration(self._search_regex(
+            r'(?s)(?:data-video-id="{0}"|data-video="[^"]*/api/video/jwplayer/#v={0}[^"]*")[^>]*>.+?<span class="duration">([^<]+)</span>'.format(video_id),
+            webpage, 'duration', fatal=False))
+
+        timestamp = parse_iso8601(self._search_regex(
+            r'datetime="([^"]+)"', webpage, 'upload date', fatal=False))
+        uploader = self._search_regex(
+            r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
+
+        view_count = str_to_int(self._search_regex(
+            r'<span class="views"><strong>([\d.]+)</strong>', webpage, 'view count', fatal=False))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'view_count': view_count,
+            'formats': formats,
+        }
index 4ccf6b9b8a82c3ef28c1d9d04dcc6f26ce2a8f8d..a38eae421a9199b578b3a724d205b13e6367c67a 100644 (file)
@@ -39,8 +39,9 @@ class HuffPostIE(InfoExtractor):
         data = self._download_json(api_url, video_id)['data']
 
         video_title = data['title']
-        duration = parse_duration(data['running_time'])
-        upload_date = unified_strdate(data['schedule']['starts_at'])
+        duration = parse_duration(data.get('running_time'))
+        upload_date = unified_strdate(
+            data.get('schedule', {}).get('starts_at') or data.get('segment_start_date_time'))
         description = data.get('description')
 
         thumbnails = []
@@ -59,16 +60,11 @@ class HuffPostIE(InfoExtractor):
             'ext': 'mp4',
             'url': url,
             'vcodec': 'none' if key.startswith('audio/') else None,
-        } for key, url in data['sources']['live'].items()]
-        if data.get('fivemin_id'):
-            fid = data['fivemin_id']
-            fcat = str(int(fid) // 100 + 1)
-            furl = 'http://avideos.5min.com/2/' + fcat[-3:] + '/' + fcat + '/' + fid + '.mp4'
-            formats.append({
-                'format': 'fivemin',
-                'url': furl,
-                'preference': 1,
-            })
+        } for key, url in data.get('sources', {}).get('live', {}).items()]
+
+        if not formats and data.get('fivemin_id'):
+            return self.url_result('5min:%s' % data['fivemin_id'])
+
         self._sort_formats(formats)
 
         return {
index 13a53a0cb39f70ed1aaf1713283852cdc3cebeb4..f29df36b5bf6bd7e732ad84cbfd7d3eeb412f5ff 100644 (file)
@@ -16,7 +16,6 @@ class ImdbIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://www.imdb.com/video/imdb/vi2524815897',
-        'md5': '9f34fa777ade3a6e57a054fdbcb3a068',
         'info_dict': {
             'id': '2524815897',
             'ext': 'mp4',
index 408d00944ceb83e1551c12b5707031c961bb4f5d..08a671fa86a007d3327ef03c257f1b943bd425db 100644 (file)
@@ -22,8 +22,10 @@ class KhanAcademyIE(InfoExtractor):
             'description': 'The perfect cipher',
             'duration': 176,
             'uploader': 'Brit Cruise',
+            'uploader_id': 'khanacademy',
             'upload_date': '20120411',
-        }
+        },
+        'add_ie': ['Youtube'],
     }, {
         'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
         'info_dict': {
index 41fd62009ac16100c1e6bb776028020cd12e9ff2..720bc939bfd4c3a30c9a3709968c6008e6472067 100644 (file)
@@ -10,13 +10,14 @@ from ..utils import int_or_none
 class KontrTubeIE(InfoExtractor):
     IE_NAME = 'kontrtube'
     IE_DESC = 'KontrTube.ru - Труба зовёт'
-    _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+'
+    _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
 
     _TEST = {
         'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
         'md5': '975a991a4926c9a85f383a736a2e6b80',
         'info_dict': {
             'id': '2678',
+            'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag',
             'ext': 'mp4',
             'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
             'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
@@ -28,21 +29,28 @@ class KontrTubeIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
 
-        webpage = self._download_webpage(url, video_id, 'Downloading page')
+        webpage = self._download_webpage(
+            url, display_id, 'Downloading page')
 
-        video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
-        thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
+        video_url = self._html_search_regex(
+            r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
+        thumbnail = self._html_search_regex(
+            r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False)
         title = self._html_search_regex(
             r'<title>(.+?)</title>', webpage, 'video title')
-        description = self._html_search_meta('description', webpage, 'video description')
+        description = self._html_search_meta(
+            'description', webpage, 'video description')
 
         mobj = re.search(
-            r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage)
+            r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
+            webpage)
         duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
 
         view_count = self._html_search_regex(
-            r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False)
+            r'<div class="col_2">Просмотров: <span>(\d+)</span></div>',
+            webpage, 'view count', fatal=False)
 
         comment_count = None
         comment_str = self._html_search_regex(
@@ -56,6 +64,7 @@ class KontrTubeIE(InfoExtractor):
 
         return {
             'id': video_id,
+            'display_id': display_id,
             'url': video_url,
             'thumbnail': thumbnail,
             'title': title,
index d72d470aa8dbb532f80f44796354dabd3de92261..9c2fbdd96788aeb4a854777cfcb6c67dff18f0ce 100644 (file)
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 
 import re
-import json
 
 from .common import InfoExtractor
 from ..utils import (
@@ -28,7 +27,6 @@ class LRTIE(InfoExtractor):
         'params': {
             'skip_download': True,  # HLS download
         },
-
     }
 
     def _real_extract(self, url):
@@ -44,7 +42,9 @@ class LRTIE(InfoExtractor):
 
         formats = []
         for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage):
-            data = json.loads(js_to_json(js))
+            data = self._parse_json(js, video_id, transform_source=js_to_json)
+            if 'provider' not in data:
+                continue
             if data['provider'] == 'rtmp':
                 formats.append({
                     'format_id': 'rtmp',
index 78787e8f1cbb067527bd56e3167a921a3541e6d9..3c61a850f296c32861cdfd35095746c2cf1ef4ad 100644 (file)
@@ -105,6 +105,9 @@ class OCWMITIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
                 'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
+                'upload_date': '20121109',
+                'uploader_id': 'MIT',
+                'uploader': 'MIT OpenCourseWare',
                 # 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
             }
         },
@@ -114,6 +117,9 @@ class OCWMITIE(InfoExtractor):
                 'id': '7K1sB05pE0A',
                 'ext': 'mp4',
                 'title': 'Session 1: Introduction to Derivatives',
+                'upload_date': '20090818',
+                'uploader_id': 'MIT',
+                'uploader': 'MIT OpenCourseWare',
                 'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
                 # 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
             }
index f5ca74e976bc10ff896bf7e6134a14332c7b131c..c1a482dba39fb98efdb28e85b681565eb58e3f9e 100644 (file)
@@ -1,63 +1,49 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import hashlib
-import json
-import time
-
 from .common import InfoExtractor
 from ..compat import (
-    compat_parse_qs,
-    compat_str,
-)
-from ..utils import (
-    int_or_none,
+    compat_urlparse,
 )
 
 
 class MotorsportIE(InfoExtractor):
     IE_DESC = 'motorsport.com'
-    _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/(?:$|[?#])'
+    _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
     _TEST = {
         'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
-        'md5': '5592cb7c5005d9b2c163df5ac3dc04e4',
         'info_dict': {
-            'id': '7063',
+            'id': '2-T3WuR-KMM',
             'ext': 'mp4',
             'title': 'Red Bull Racing: 2014 Rules Explained',
-            'duration': 207,
+            'duration': 208,
             'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.',
-            'uploader': 'rainiere',
-            'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$'
-        }
+            'uploader': 'mcomstaff',
+            'uploader_id': 'UC334JIYKkVnyFoNCclfZtHQ',
+            'upload_date': '20140903',
+            'thumbnail': r're:^https?://.+\.jpg$'
+        },
+        'add_ie': ['Youtube'],
+        'params': {
+            'skip_download': True,
+        },
     }
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
 
-        flashvars_code = self._html_search_regex(
-            r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars')
-        flashvars = compat_parse_qs(flashvars_code)
-        params = json.loads(flashvars['parameters'][0])
-
-        e = compat_str(int(time.time()) + 24 * 60 * 60)
-        base_video_url = params['location'] + '?e=' + e
-        s = 'h3hg713fh32'
-        h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest()
-        video_url = base_video_url + '&h=' + h
-
-        uploader = self._html_search_regex(
-            r'(?s)<span class="label">Video by: </span>(.*?)</a>', webpage,
-            'uploader', fatal=False)
+        iframe_path = self._html_search_regex(
+            r'<iframe id="player_iframe"[^>]+src="([^"]+)"', webpage,
+            'iframe path')
+        iframe = self._download_webpage(
+            compat_urlparse.urljoin(url, iframe_path), display_id,
+            'Downloading iframe')
+        youtube_id = self._search_regex(
+            r'www.youtube.com/embed/(.{11})', iframe, 'youtube id')
 
         return {
-            'id': params['video_id'],
+            '_type': 'url_transparent',
             'display_id': display_id,
-            'title': params['title'],
-            'url': video_url,
-            'description': params.get('description'),
-            'thumbnail': params.get('main_thumb'),
-            'duration': int_or_none(params.get('duration')),
-            'uploader': uploader,
+            'url': 'https://youtube.com/watch?v=%s' % youtube_id,
         }
diff --git a/youtube_dl/extractor/netzkino.py b/youtube_dl/extractor/netzkino.py
new file mode 100644 (file)
index 0000000..93567d1
--- /dev/null
@@ -0,0 +1,86 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    int_or_none,
+    js_to_json,
+    parse_iso8601,
+)
+
+
+class NetzkinoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P<category>[^/]+)/(?P<id>[^/]+)'
+
+    _TEST = {
+        'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond',
+        'md5': '92a3f8b76f8d7220acce5377ea5d4873',
+        'info_dict': {
+            'id': 'rakete-zum-mond',
+            'ext': 'mp4',
+            'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)',
+            'comments': 'mincount:3',
+            'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28',
+            'upload_date': '20120813',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'timestamp': 1344858571,
+            'age_limit': 12,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        category_id = mobj.group('category')
+        video_id = mobj.group('id')
+
+        api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id
+        api_info = self._download_json(api_url, video_id)
+        info = next(
+            p for p in api_info['posts'] if p['slug'] == video_id)
+        custom_fields = info['custom_fields']
+
+        production_js = self._download_webpage(
+            'http://www.netzkino.de/beta/dist/production.min.js', video_id,
+            note='Downloading player code')
+        avo_js = self._search_regex(
+            r'window\.avoCore\s*=.*?urlTemplate:\s*(\{.*?"\})',
+            production_js, 'URL templates')
+        templates = self._parse_json(
+            avo_js, video_id, transform_source=js_to_json)
+
+        suffix = {
+            'hds': '.mp4/manifest.f4m',
+            'hls': '.mp4/master.m3u8',
+            'pmd': '.mp4',
+        }
+        film_fn = custom_fields['Streaming'][0]
+        formats = [{
+            'format_id': key,
+            'ext': 'mp4',
+            'url': tpl.replace('{}', film_fn) + suffix[key],
+        } for key, tpl in templates.items()]
+        self._sort_formats(formats)
+
+        comments = [{
+            'timestamp': parse_iso8601(c.get('date'), delimiter=' '),
+            'id': c['id'],
+            'author': c['name'],
+            'html': c['content'],
+            'parent': 'root' if c.get('parent', 0) == 0 else c['parent'],
+        } for c in info.get('comments', [])]
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'comments': comments,
+            'title': info['title'],
+            'age_limit': int_or_none(custom_fields.get('FSK')[0]),
+            'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
+            'description': clean_html(info.get('content')),
+            'thumbnail': info.get('thumbnail'),
+            'playlist_title': api_info.get('title'),
+            'playlist_id': category_id,
+        }
index 3d35b11ac81286a359a06620d0e08e8fae18043b..c13ff0d650bcd443bf7a6b6d2444215892d72732 100644 (file)
@@ -22,7 +22,11 @@ class NormalbootsIE(InfoExtractor):
             'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/',
             'uploader': 'JonTron',
             'upload_date': '20140125',
-        }
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
     }
 
     def _real_extract(self, url):
index 43e8e619f6d0562982097a0e36da5eee58e5c713..321ce5ce707c7006ad1a8f5979e01afab1bddd23 100644 (file)
@@ -72,7 +72,7 @@ class NRKIE(InfoExtractor):
 
 
 class NRKTVIE(InfoExtractor):
-    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})'
+    _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
 
     _TESTS = [
         {
@@ -85,7 +85,7 @@ class NRKTVIE(InfoExtractor):
                 'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
                 'upload_date': '20140523',
                 'duration': 1741.52,
-            }
+            },
         },
         {
             'url': 'http://tv.nrk.no/program/mdfp15000514',
@@ -97,39 +97,119 @@ class NRKTVIE(InfoExtractor):
                 'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
                 'upload_date': '20140524',
                 'duration': 4605.0,
-            }
+            },
         },
+        {
+            # single playlist video
+            'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
+            'md5': 'adbd1dbd813edaf532b0a253780719c2',
+            'info_dict': {
+                'id': 'MSPO40010515-part2',
+                'ext': 'flv',
+                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
+                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+                'upload_date': '20150106',
+            },
+            'skip': 'Only works from Norway',
+        },
+        {
+            'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
+            'playlist': [
+                {
+                    'md5': '9480285eff92d64f06e02a5367970a7a',
+                    'info_dict': {
+                        'id': 'MSPO40010515-part1',
+                        'ext': 'flv',
+                        'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
+                        'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+                        'upload_date': '20150106',
+                    },
+                },
+                {
+                    'md5': 'adbd1dbd813edaf532b0a253780719c2',
+                    'info_dict': {
+                        'id': 'MSPO40010515-part2',
+                        'ext': 'flv',
+                        'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
+                        'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+                        'upload_date': '20150106',
+                    },
+                },
+            ],
+            'info_dict': {
+                'id': 'MSPO40010515',
+                'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
+                'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+                'upload_date': '20150106',
+                'duration': 6947.5199999999995,
+            },
+            'skip': 'Only works from Norway',
+        }
     ]
 
+    def _extract_f4m(self, manifest_url, video_id):
+        return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
-
-        page = self._download_webpage(url, video_id)
-
-        title = self._html_search_meta('title', page, 'title')
-        description = self._html_search_meta('description', page, 'description')
-        thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False)
-        upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False))
-        duration = float_or_none(
-            self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False))
+        part_id = mobj.group('part_id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_meta(
+            'title', webpage, 'title')
+        description = self._html_search_meta(
+            'description', webpage, 'description')
+
+        thumbnail = self._html_search_regex(
+            r'data-posterimage="([^"]+)"',
+            webpage, 'thumbnail', fatal=False)
+        upload_date = unified_strdate(self._html_search_meta(
+            'rightsfrom', webpage, 'upload date', fatal=False))
+        duration = float_or_none(self._html_search_regex(
+            r'data-duration="([^"]+)"',
+            webpage, 'duration', fatal=False))
+
+        # playlist
+        parts = re.findall(
+            r'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage)
+        if parts:
+            entries = []
+            for current_part_id, stream_url, part_title in parts:
+                if part_id and current_part_id != part_id:
+                    continue
+                video_part_id = '%s-part%s' % (video_id, current_part_id)
+                formats = self._extract_f4m(stream_url, video_part_id)
+                entries.append({
+                    'id': video_part_id,
+                    'title': part_title,
+                    'description': description,
+                    'thumbnail': thumbnail,
+                    'upload_date': upload_date,
+                    'formats': formats,
+                })
+            if part_id:
+                if entries:
+                    return entries[0]
+            else:
+                playlist = self.playlist_result(entries, video_id, title, description)
+                playlist.update({
+                    'thumbnail': thumbnail,
+                    'upload_date': upload_date,
+                    'duration': duration,
+                })
+                return playlist
 
         formats = []
 
-        f4m_url = re.search(r'data-media="([^"]+)"', page)
+        f4m_url = re.search(r'data-media="([^"]+)"', webpage)
         if f4m_url:
-            formats.append({
-                'url': f4m_url.group(1) + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
-                'format_id': 'f4m',
-                'ext': 'flv',
-            })
+            formats.extend(self._extract_f4m(f4m_url.group(1), video_id))
 
-        m3u8_url = re.search(r'data-hls-media="([^"]+)"', page)
+        m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)
         if m3u8_url:
-            formats.append({
-                'url': m3u8_url.group(1),
-                'format_id': 'm3u8',
-            })
+            formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4'))
 
         self._sort_formats(formats)
 
index 449d4836c3f7c0c8ce220cbcbd447e76cb17ea0f..45716c75d9505c5fcb7e8c6d73ec4feaef298aee 100644 (file)
@@ -26,6 +26,7 @@ class PlayedIE(InfoExtractor):
             'ext': 'flv',
             'title': 'youtube-dl_test_video.mp4',
         },
+        'skip': 'Removed for copyright infringement.',  # oh wow
     }
 
     def _real_extract(self, url):
diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py
new file mode 100644 (file)
index 0000000..0d70631
--- /dev/null
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import parse_duration
+
+
+class RadioBremenIE(InfoExtractor):
+    _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P<id>[0-9]+)'
+    IE_NAME = 'radiobremen'
+
+    _TEST = {
+        'url': 'http://www.radiobremen.de/mediathek/index.html?id=114720',
+        'info_dict': {
+            'id': '114720',
+            'ext': 'mp4',
+            'duration': 1685,
+            'width': 512,
+            'title': 'buten un binnen vom 22. Dezember',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id
+        meta_doc = self._download_webpage(
+            meta_url, video_id, 'Downloading metadata')
+        title = self._html_search_regex(
+            r"<h1.*>(?P<title>.+)</h1>", meta_doc, "title")
+        description = self._html_search_regex(
+            r"<p>(?P<description>.*)</p>", meta_doc, "description", fatal=False)
+        duration = parse_duration(self._html_search_regex(
+            r"L&auml;nge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>",
+            meta_doc, "duration", fatal=False))
+
+        page_doc = self._download_webpage(
+            url, video_id, 'Downloading video information')
+        mobj = re.search(
+            r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)",
+            page_doc)
+        video_url = (
+            "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" %
+            (video_id, video_id, mobj.group("secret"), mobj.group('width')))
+
+        formats = [{
+            'url': video_url,
+            'ext': 'mp4',
+            'width': int(mobj.group("width")),
+        }]
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'formats': formats,
+            'thumbnail': mobj.group('thumbnail'),
+        }
index d029b0ec525fc2e1186aba7e1c3dadf9bed981b0..a3ca79f2ccfd2e00c09a4f9b2a9503fa85669b65 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import parse_duration
 
 class RtlXlIE(InfoExtractor):
     IE_NAME = 'rtlxl.nl'
-    _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
+    _VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
 
     _TEST = {
         'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
index b72b5a5869ae3ae6044ac210d55eca9df8a5ccf8..5b1c3577a02bb541d912faa6958533086534af01 100644 (file)
@@ -70,6 +70,37 @@ class RutubeIE(InfoExtractor):
         }
 
 
+class RutubeEmbedIE(InfoExtractor):
+    IE_NAME = 'rutube:embed'
+    IE_DESC = 'Rutube embedded videos'
+    _VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)'
+
+    _TEST = {
+        'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
+        'info_dict': {
+            'id': 'a10e53b86e8f349080f718582ce4c661',
+            'ext': 'mp4',
+            'upload_date': '20131223',
+            'uploader_id': '297833',
+            'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
+            'uploader': 'subziro89 ILya',
+            'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
+        },
+        'params': {
+            'skip_download': 'Requires ffmpeg',
+        },
+    }
+
+    def _real_extract(self, url):
+        embed_id = self._match_id(url)
+        webpage = self._download_webpage(url, embed_id)
+
+        canonical_url = self._html_search_regex(
+            r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
+            'Canonical URL')
+        return self.url_result(canonical_url, 'Rutube')
+
+
 class RutubeChannelIE(InfoExtractor):
     IE_NAME = 'rutube:channel'
     IE_DESC = 'Rutube channels'
index c833fc8ee817bce2ce09fce724f67633a54ba459..6446d26dc416703da688386a578f904d24b102a4 100644 (file)
@@ -24,7 +24,7 @@ class SexyKarmaIE(InfoExtractor):
             'title': 'Taking a quick pee.',
             'thumbnail': 're:^https?://.*\.jpg$',
             'uploader': 'wildginger7',
-            'upload_date': '20141007',
+            'upload_date': '20141008',
             'duration': 22,
             'view_count': int,
             'comment_count': int,
@@ -45,6 +45,7 @@ class SexyKarmaIE(InfoExtractor):
             'view_count': int,
             'comment_count': int,
             'categories': list,
+            'age_limit': 18,
         }
     }, {
         'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html',
@@ -61,6 +62,7 @@ class SexyKarmaIE(InfoExtractor):
             'view_count': int,
             'comment_count': int,
             'categories': list,
+            'age_limit': 18,
         }
     }]
 
@@ -114,4 +116,5 @@ class SexyKarmaIE(InfoExtractor):
             'view_count': view_count,
             'comment_count': comment_count,
             'categories': categories,
+            'age_limit': 18,
         }
diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py
new file mode 100644 (file)
index 0000000..feef33e
--- /dev/null
@@ -0,0 +1,80 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    HEADRequest,
+    urlhandle_detect_ext,
+)
+
+
+class SoulAnimeWatchingIE(InfoExtractor):
+    IE_NAME = "soulanime:watching"
+    IE_DESC = "SoulAnime video"
+    _TEST = {
+        'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
+        'md5': '05fae04abf72298098b528e98abf4298',
+        'info_dict': {
+            'id': 'seirei-tsukai-no-blade-dance-episode-9',
+            'ext': 'mp4',
+            'title': 'seirei-tsukai-no-blade-dance-episode-9',
+            'description': 'seirei-tsukai-no-blade-dance-episode-9'
+        }
+    }
+    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        domain = mobj.group('domain')
+
+        page = self._download_webpage(url, video_id)
+
+        video_url_encoded = self._html_search_regex(
+            r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
+        video_url = "http://www.soul-anime." + domain + video_url_encoded
+
+        ext_req = HEADRequest(video_url)
+        ext_handle = self._request_webpage(
+            ext_req, video_id, note='Determining extension')
+        ext = urlhandle_detect_ext(ext_handle)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': ext,
+            'title': video_id,
+            'description': video_id
+        }
+
+
+class SoulAnimeSeriesIE(InfoExtractor):
+    IE_NAME = "soulanime:series"
+    IE_DESC = "SoulAnime Series"
+
+    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
+
+    _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
+
+    _TEST = {
+        'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
+        'info_dict': {
+            'id': 'black-rock-shooter-tv'
+        },
+        'playlist_count': 8
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        series_id = mobj.group('id')
+        domain = mobj.group('domain')
+
+        pattern = re.compile(self._EPISODE_REGEX)
+
+        page = self._download_webpage(url, series_id, "Downloading series page")
+        mobj = pattern.findall(page)
+
+        entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
+
+        return self.playlist_result(entries, series_id)
index 6c3445d792206395b7a36d016b8a42ad255ea9cc..82675431f863fded8768241e2ad21c4874f8525d 100644 (file)
@@ -57,9 +57,7 @@ class TeacherTubeIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         title = self._html_search_meta('title', webpage, 'title', fatal=True)
index 944177426d5d719d152d3474f5b059410cb27955..10b3b706a9c82ef8398d408a948e72b6c52b31c3 100644 (file)
@@ -13,7 +13,7 @@ from ..compat import (
 class TEDIE(SubtitlesInfoExtractor):
     _VALID_URL = r'''(?x)
         (?P<proto>https?://)
-        (?P<type>www|embed)(?P<urlmain>\.ted\.com/
+        (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
         (
             (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
             |
@@ -98,7 +98,7 @@ class TEDIE(SubtitlesInfoExtractor):
 
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url, re.VERBOSE)
-        if m.group('type') == 'embed':
+        if m.group('type').startswith('embed'):
             desktop_url = m.group('proto') + 'www' + m.group('urlmain')
             return self.url_result(desktop_url, 'TED')
         name = m.group('name')
index 6e61cc9e2ecf621b19fe13924456970b091bce1c..025d0877cb928bb433aff9f6eff19a29d253e006 100644 (file)
@@ -1,15 +1,13 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
 class TF1IE(InfoExtractor):
     """TF1 uses the wat.tv player."""
-    _VALID_URL = r'http://videos\.tf1\.fr/.*-(?P<id>.*?)\.html'
-    _TEST = {
+    _VALID_URL = r'http://(?:videos\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html'
+    _TESTS = {
         'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
         'info_dict': {
             'id': '10635995',
@@ -21,14 +19,26 @@ class TF1IE(InfoExtractor):
             # Sometimes wat serves the whole file with the --test option
             'skip_download': True,
         },
+    }, {
+        'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',
+        'info_dict': {
+            'id': '12043945',
+            'ext': 'mp4',
+            'title': 'Le grand Mystérioso - Chuggington',
+            'description': 'Le grand Mystérioso - Emery rêve qu\'un article lui soit consacré dans le journal.',
+            'upload_date': '20150103',
+        },
+        'params': {
+            # Sometimes wat serves the whole file with the --test option
+            'skip_download': True,
+        },
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
         embed_url = self._html_search_regex(
-            r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
+            r'["\'](https?://www.wat.tv/embedframe/.*?)["\']', webpage, 'embed url')
         embed_page = self._download_webpage(embed_url, video_id,
                                             'Downloading embed player page')
         wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
index 161e47624b383dfe76ea1e2ea6ec73395a97db53..c89de5ba4a46bb261987d8dbee5f55b3d05492da 100644 (file)
@@ -9,7 +9,7 @@ from .common import InfoExtractor
 
 
 class TudouIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
+    _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/.*?/(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])'
     _TESTS = [{
         'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
         'md5': '140a49ed444bd22f93330985d8475fcb',
@@ -27,13 +27,6 @@ class TudouIE(InfoExtractor):
             'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
             'thumbnail': 're:^https?://.*\.jpg$',
         }
-    }, {
-        'url': 'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html',
-        'info_dict': {
-            'title': 'todo.mp4',
-        },
-        'add_ie': ['Youku'],
-        'skip': 'Only works from China'
     }]
 
     def _url_for_id(self, id, quality=None):
@@ -45,8 +38,7 @@ class TudouIE(InfoExtractor):
         return final_url
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(2)
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage)
@@ -87,4 +79,9 @@ class TudouIE(InfoExtractor):
             }
             result.append(part_info)
 
-        return result
+        return {
+            '_type': 'multi_video',
+            'entries': result,
+            'id': video_id,
+            'title': title,
+        }
index 4ce5aeeba242b94b78d71e3c9d033aa318b588fb..b6b1f2568f23a6ea9fe8e12c86deb6b30d44a809 100644 (file)
@@ -24,7 +24,7 @@ class TuneInIE(InfoExtractor):
     _INFO_DICT = {
         'id': '34682',
         'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
-        'ext': 'AAC',
+        'ext': 'aac',
         'thumbnail': 're:^https?://.*\.png$',
         'location': 'Tacoma, WA',
     }
@@ -78,14 +78,21 @@ class TuneInIE(InfoExtractor):
         for stream in streams:
             if stream.get('Type') == 'Live':
                 is_live = True
+            reliability = stream.get('Reliability')
+            format_note = (
+                'Reliability: %d%%' % reliability
+                if reliability is not None else None)
             formats.append({
+                'preference': (
+                    0 if reliability is None or reliability > 90
+                    else 1),
                 'abr': stream.get('Bandwidth'),
-                'ext': stream.get('MediaType'),
+                'ext': stream.get('MediaType').lower(),
                 'acodec': stream.get('MediaType'),
                 'vcodec': 'none',
                 'url': stream.get('Url'),
-                # Sometimes streams with the highest quality do not exist
-                'preference': stream.get('Reliability'),
+                'source_preference': reliability,
+                'format_note': format_note,
             })
         self._sort_formats(formats)
 
diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py
new file mode 100644 (file)
index 0000000..619039e
--- /dev/null
@@ -0,0 +1,118 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class VierIE(InfoExtractor):
+    IE_NAME = 'vier'
+    _VALID_URL = r'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
+    _TESTS = [{
+        'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
+        'info_dict': {
+            'id': '16129',
+            'display_id': 'het-wordt-warm-de-moestuin',
+            'ext': 'mp4',
+            'title': 'Het wordt warm in De Moestuin',
+            'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.vier.be/video/v3/embed/16129',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        embed_id = mobj.group('embed_id')
+        display_id = mobj.group('display_id') or embed_id
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._search_regex(
+            r'"nid"\s*:\s*"(\d+)"', webpage, 'video id')
+        application = self._search_regex(
+            r'"application"\s*:\s*"([^"]+)"', webpage, 'application', default='vier_vod')
+        filename = self._search_regex(
+            r'"filename"\s*:\s*"([^"]+)"', webpage, 'filename')
+
+        playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
+        formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
+
+        title = self._og_search_title(webpage, default=display_id)
+        description = self._og_search_description(webpage, default=None)
+        thumbnail = self._og_search_thumbnail(webpage, default=None)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
+
+
+class VierVideosIE(InfoExtractor):
+    IE_NAME = 'vier:videos'
+    _VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'
+    _TESTS = [{
+        'url': 'http://www.vier.be/demoestuin/videos',
+        'info_dict': {
+            'id': 'demoestuin',
+        },
+        'playlist_mincount': 153,
+    }, {
+        'url': 'http://www.vier.be/demoestuin/videos?page=6',
+        'info_dict': {
+            'id': 'demoestuin-page6',
+        },
+        'playlist_mincount': 20,
+    }, {
+        'url': 'http://www.vier.be/demoestuin/videos?page=7',
+        'info_dict': {
+            'id': 'demoestuin-page7',
+        },
+        'playlist_mincount': 13,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        program = mobj.group('program')
+
+        webpage = self._download_webpage(url, program)
+
+        page_id = mobj.group('page')
+        if page_id:
+            page_id = int(page_id)
+            start_page = page_id
+            last_page = start_page + 1
+            playlist_id = '%s-page%d' % (program, page_id)
+        else:
+            start_page = 0
+            last_page = int(self._search_regex(
+                r'videos\?page=(\d+)">laatste</a>',
+                webpage, 'last page', default=0)) + 1
+            playlist_id = program
+
+        entries = []
+        for current_page_id in range(start_page, last_page):
+            current_page = self._download_webpage(
+                'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
+                program,
+                'Downloading page %d' % (current_page_id + 1)) if current_page_id != page_id else webpage
+            page_entries = [
+                self.url_result('http://www.vier.be' + video_url, 'Vier')
+                for video_url in re.findall(
+                    r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
+            entries.extend(page_entries)
+
+        return self.playlist_result(entries, playlist_id)
index 15f31529822bcba124cfb12bcb9e56566b3bfba7..944901e1482a666ae90cc5e1c0f86e325ec2aecc 100644 (file)
@@ -17,7 +17,6 @@ class VikiIE(SubtitlesInfoExtractor):
     _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
     _TEST = {
         'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
-        'md5': 'a21454021c2646f5433514177e2caa5f',
         'info_dict': {
             'id': '1023585v',
             'ext': 'mp4',
@@ -31,8 +30,7 @@ class VikiIE(SubtitlesInfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
         title = self._og_search_title(webpage)
index 33d370e1ceb1f0d6db418c8575830ac60d4294a4..ee3d86117e625cca66303aeeee229f1a091b4602 100644 (file)
@@ -14,28 +14,17 @@ class VimpleIE(InfoExtractor):
     IE_DESC = 'Vimple.ru'
     _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})'
     _TESTS = [
-        # Quality: Large, from iframe
         {
-            'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c',
+            'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',
+            'md5': '2e750a330ed211d3fd41821c6ad9a279',
             'info_dict': {
-                'id': 'b132bdfd71b546d3972f9ab9a25f201c',
-                'title': 'great-escape-minecraft.flv',
+                'id': 'c0f6b1687dcd4000a97ebe70068039cf',
                 'ext': 'mp4',
-                'duration': 352,
-                'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c',
+                'title': 'Sunset',
+                'duration': 20,
+                'thumbnail': 're:https?://.*?\.jpg',
             },
         },
-        # Quality: Medium, from mainpage
-        {
-            'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd',
-            'info_dict': {
-                'id': 'a15950562888453b8e6f9572dc8600cd',
-                'title': 'DB 01',
-                'ext': 'flv',
-                'duration': 1484,
-                'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd',
-            }
-        },
     ]
 
     def _real_extract(self, url):
index 542e9198ac0e5a9470ad5e16717bf9ca006050af..81e02a6244d83327b05c6b76c490391b97b15f92 100644 (file)
@@ -164,6 +164,14 @@ class VKIE(InfoExtractor):
             self.to_screen('Youtube video detected')
             return self.url_result(m_yt.group(1), 'Youtube')
 
+        m_rutube = re.search(
+            r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)
+        if m_rutube is not None:
+            self.to_screen('rutube video detected')
+            rutube_url = self._proto_relative_url(
+                m_rutube.group(1).replace('\\', ''))
+            return self.url_result(rutube_url)
+
         m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page)
         if m_opts:
             m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1))
index 88bbbb21967c6807c536ecc5b06d8d8f41095219..c17bebd6e919673d9011de3ac37dfff2929b2cc8 100644 (file)
@@ -10,14 +10,14 @@ from ..utils import (
 
 
 class WashingtonPostIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
+    _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
     _TEST = {
         'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
         'info_dict': {
             'title': 'Sinkhole of bureaucracy',
         },
         'playlist': [{
-            'md5': 'c3f4b4922ffa259243f68e928db2db8c',
+            'md5': '79132cc09ec5309fa590ae46e4cc31bc',
             'info_dict': {
                 'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
                 'ext': 'mp4',
@@ -29,7 +29,7 @@ class WashingtonPostIE(InfoExtractor):
                 'upload_date': '20140322',
             },
         }, {
-            'md5': 'f645a07652c2950cd9134bb852c5f5eb',
+            'md5': 'e1d5734c06865cc504ad99dc2de0d443',
             'info_dict': {
                 'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
                 'ext': 'mp4',
@@ -44,10 +44,9 @@ class WashingtonPostIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        page_id = mobj.group('id')
-
+        page_id = self._match_id(url)
         webpage = self._download_webpage(url, page_id)
+
         title = self._og_search_title(webpage)
         uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
         entries = []
index 8e25ecf280769166a49d18cfd2508bd6d90caa74..45466e31b7445f8dd8da742308dcc69f2ff1152f 100644 (file)
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
+import itertools
 import re
 
 from .common import InfoExtractor
@@ -67,6 +68,10 @@ class WDRIE(InfoExtractor):
                 'upload_date': '20140717',
             },
         },
+        {
+            'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
+            'playlist_mincount': 146,
+        }
     ]
 
     def _real_extract(self, url):
@@ -81,6 +86,27 @@ class WDRIE(InfoExtractor):
                 self.url_result(page_url + href, 'WDR')
                 for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage)
             ]
+
+            if entries:  # Playlist page
+                return self.playlist_result(entries, page_id)
+
+            # Overview page
+            entries = []
+            for page_num in itertools.count(2):
+                hrefs = re.findall(
+                    r'<li class="mediathekvideo"\s*>\s*<img[^>]*>\s*<a href="(/mediathek/video/[^"]+)"',
+                    webpage)
+                entries.extend(
+                    self.url_result(page_url + href, 'WDR')
+                    for href in hrefs)
+                next_url_m = re.search(
+                    r'<li class="nextToLast">\s*<a href="([^"]+)"', webpage)
+                if not next_url_m:
+                    break
+                next_url = page_url + next_url_m.group(1)
+                webpage = self._download_webpage(
+                    next_url, page_id,
+                    note='Downloading playlist page %d' % page_num)
             return self.playlist_result(entries, page_id)
 
         flashvars = compat_parse_qs(
@@ -172,8 +198,7 @@ class WDRMausIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
         param_code = self._html_search_regex(
@@ -224,5 +249,3 @@ class WDRMausIE(InfoExtractor):
             'thumbnail': thumbnail,
             'upload_date': upload_date,
         }
-
-# TODO test _1
diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py
new file mode 100644 (file)
index 0000000..396cf4e
--- /dev/null
@@ -0,0 +1,102 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class WebOfStoriesIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?webofstories\.com/play/(?:[^/]+/)?(?P<id>[0-9]+)'
+    _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/'
+    _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/'
+    _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/'
+    _TESTS = [
+        {
+            'url': 'http://www.webofstories.com/play/hans.bethe/71',
+            'md5': '373e4dd915f60cfe3116322642ddf364',
+            'info_dict': {
+                'id': '4536',
+                'ext': 'mp4',
+                'title': 'The temperature of the sun',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'description': 'Hans Bethe talks about calculating the temperature of the sun',
+                'duration': 238,
+            }
+        },
+        {
+            'url': 'http://www.webofstories.com/play/55908',
+            'md5': '2985a698e1fe3211022422c4b5ed962c',
+            'info_dict': {
+                'id': '55908',
+                'ext': 'mp4',
+                'title': 'The story of Gemmata obscuriglobus',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
+                'duration': 169,
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._og_search_title(webpage)
+        description = self._html_search_meta('description', webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        story_filename = self._search_regex(
+            r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
+        speaker_id = self._search_regex(
+            r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
+        story_id = self._search_regex(
+            r'\.storyId\((\d+)\)', webpage, 'story ID')
+        speaker_type = self._search_regex(
+            r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
+        great_life = self._search_regex(
+            r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
+        is_great_life_series = great_life == 'true'
+        duration = int_or_none(self._search_regex(
+            r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
+
+        # URL building, see: http://www.webofstories.com/scripts/player.js
+        ms_prefix = ''
+        if speaker_type.lower() == 'ms':
+            ms_prefix = 'mini_sites/'
+
+        if is_great_life_series:
+            mp4_url = '{0:}lives/{1:}/{2:}.mp4'.format(
+                self._VIDEO_DOMAIN, speaker_id, story_filename)
+            rtmp_ext = 'flv'
+            streamer = self._GREAT_LIFE_STREAMER
+            play_path = 'stories/{0:}/{1:}'.format(
+                speaker_id, story_filename)
+        else:
+            mp4_url = '{0:}{1:}{2:}/{3:}.mp4'.format(
+                self._VIDEO_DOMAIN, ms_prefix, speaker_id, story_filename)
+            rtmp_ext = 'mp4'
+            streamer = self._USER_STREAMER
+            play_path = 'mp4:{0:}{1:}/{2}.mp4'.format(
+                ms_prefix, speaker_id, story_filename)
+
+        formats = [{
+            'format_id': 'mp4_sd',
+            'url': mp4_url,
+        }, {
+            'format_id': 'rtmp_sd',
+            'page_url': url,
+            'url': streamer,
+            'ext': rtmp_ext,
+            'play_path': play_path,
+        }]
+
+        self._sort_formats(formats)
+
+        return {
+            'id': story_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'description': description,
+            'duration': duration,
+        }
index 6b37bcbc959a8e8b83fee052da18728ca9a9c298..4527567f8fc26c45b091aa868dc77b159576b56c 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 class XHamsterIE(InfoExtractor):
     """Information Extractor for xHamster"""
-    _VALID_URL = r'http://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
+    _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
     _TESTS = [
         {
             'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
@@ -39,7 +39,11 @@ class XHamsterIE(InfoExtractor):
                 'duration': 200,
                 'age_limit': 18,
             }
-        }
+        },
+        {
+            'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):
@@ -57,7 +61,8 @@ class XHamsterIE(InfoExtractor):
 
         video_id = mobj.group('id')
         seo = mobj.group('seo')
-        mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo)
+        proto = mobj.group('proto')
+        mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo)
         webpage = self._download_webpage(mrss_url, video_id)
 
         title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')
index cf74d4fd5059af368d72b845c15f3e1415e6481c..e8490b028e53080b8e685be13577a05603a4af9e 100644 (file)
@@ -40,7 +40,7 @@ class XTubeIE(InfoExtractor):
             r'<p class="title">([^<]+)', webpage, 'title')
         video_uploader = self._html_search_regex(
             [r"var\s+contentOwnerId\s*=\s*'([^']+)",
-             r'By:\s*<a href="/community/profile\.php?user=([^"]+)'],
+             r'By:\s*<a href="/community/profile\.php\?user=([^"]+)'],
             webpage, 'uploader', fatal=False)
         video_description = self._html_search_regex(
             r'<p class="fieldsDesc">([^<]+)',
@@ -95,6 +95,7 @@ class XTubeUserIE(InfoExtractor):
         'url': 'http://www.xtube.com/community/profile.php?user=greenshowers',
         'info_dict': {
             'id': 'greenshowers',
+            'age_limit': 18,
         },
         'playlist_mincount': 155,
     }
@@ -124,6 +125,7 @@ class XTubeUserIE(InfoExtractor):
         return {
             '_type': 'playlist',
             'id': username,
+            'age_limit': 18,
             'entries': [{
                 '_type': 'url',
                 'url': eurl,
index 7f5aeb25bbe175eddf244ec22bedbc5916b58fea..bc18276d6c7754a812b04c4ae42bc6c021d22627 100644 (file)
@@ -256,7 +256,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
-        '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
@@ -264,9 +264,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
 
         # Dash mp4 audio
-        '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
-        '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
-        '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
+        '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50},
+        '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50},
+        '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50},
 
         # Dash webm
         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
@@ -287,7 +287,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
+        '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
+        '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
 
         # Dash webm audio
         '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
@@ -412,7 +414,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'id': 'HtVdAasjOgU',
                 'ext': 'mp4',
                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
-                'description': 'md5:eca57043abae25130f58f655ad9a7771',
+                'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
                 'uploader': 'The Witcher',
                 'uploader_id': 'WitcherGame',
                 'upload_date': '20140605',
@@ -736,6 +738,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'format_id': format_id,
                 'url': video_url,
                 'width': int_or_none(r.attrib.get('width')),
+                'height': int_or_none(r.attrib.get('height')),
                 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
                 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
                 'filesize': filesize,
@@ -746,7 +749,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                     fo for fo in formats
                     if fo['format_id'] == format_id)
             except StopIteration:
-                f.update(self._formats.get(format_id, {}))
+                f.update(self._formats.get(format_id, {}).items())
                 formats.append(f)
             else:
                 existing_format.update(f)
@@ -1040,6 +1043,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                     self.report_warning(
                         'Skipping DASH manifest: %r' % e, video_id)
                 else:
+                    # Hide the formats we found through non-DASH
+                    dash_keys = set(df['format_id'] for df in dash_formats)
+                    for f in formats:
+                        if f['format_id'] in dash_keys:
+                            f['format_id'] = 'nondash-%s' % f['format_id']
+                            f['preference'] = f.get('preference', 0) - 10000
                     formats.extend(dash_formats)
 
         self._sort_formats(formats)
@@ -1199,9 +1208,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         if playlist_id.startswith('RD'):
             # Mixes require a custom extraction process
             return self._extract_mix(playlist_id)
-        if playlist_id.startswith('TL'):
-            raise ExtractorError('For downloading YouTube.com top lists, use '
-                                 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
 
         url = self._TEMPLATE_URL % playlist_id
         page = self._download_webpage(url, playlist_id)
@@ -1247,49 +1253,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         return self.playlist_result(url_results, playlist_id, playlist_title)
 
 
-class YoutubeTopListIE(YoutubePlaylistIE):
-    IE_NAME = 'youtube:toplist'
-    IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
-               ' (Example: "yttoplist:music:Top Tracks")')
-    _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
-    _TESTS = [{
-        'url': 'yttoplist:music:Trending',
-        'playlist_mincount': 5,
-        'skip': 'Only works for logged-in users',
-    }]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        channel = mobj.group('chann')
-        title = mobj.group('title')
-        query = compat_urllib_parse.urlencode({'title': title})
-        channel_page = self._download_webpage(
-            'https://www.youtube.com/%s' % channel, title)
-        link = self._html_search_regex(
-            r'''(?x)
-                <a\s+href="([^"]+)".*?>\s*
-                <span\s+class="branded-page-module-title-text">\s*
-                <span[^>]*>.*?%s.*?</span>''' % re.escape(query),
-            channel_page, 'list')
-        url = compat_urlparse.urljoin('https://www.youtube.com/', link)
-
-        video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
-        ids = []
-        # sometimes the webpage doesn't contain the videos
-        # retry until we get them
-        for i in itertools.count(0):
-            msg = 'Downloading Youtube mix'
-            if i > 0:
-                msg += ', retry #%d' % i
-
-            webpage = self._download_webpage(url, title, msg)
-            ids = orderedSet(re.findall(video_re, webpage))
-            if ids:
-                break
-        url_results = self._ids_to_results(ids)
-        return self.playlist_result(url_results, playlist_title=title)
-
-
 class YoutubeChannelIE(InfoExtractor):
     IE_DESC = 'YouTube.com channels'
     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
@@ -1701,3 +1664,20 @@ class YoutubeTruncatedURLIE(InfoExtractor):
             '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
             ' or simply  youtube-dl BaW_jenozKc  .',
             expected=True)
+
+
+class YoutubeTruncatedIDIE(InfoExtractor):
+    IE_NAME = 'youtube:truncated_id'
+    IE_DESC = False  # Do not list
+    _VALID_URL = r'https?://(?:www\.)youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
+
+    _TESTS = [{
+        'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        raise ExtractorError(
+            'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
+            expected=True)
index 74c76a9a0446482c303f3b4182f3ef2bd4942c0d..98f15177bd6665bd1c6b96a071d59d4b67e5d918 100644 (file)
@@ -119,7 +119,7 @@ class ZDFChannelIE(InfoExtractor):
         'info_dict': {
             'id': '1586442',
         },
-        'playlist_count': 4,
+        'playlist_count': 3,
     }
     _PAGE_SIZE = 50
 
index 21c452141479a492f23c1f7f20a916c347f0a3a4..14006178d7c0a71d73a13d072e1e21304d955cfb 100644 (file)
@@ -109,7 +109,7 @@ def parseOpts(overrideArguments=None):
     kw = {
         'version': __version__,
         'formatter': fmt,
-        'usage': '%prog [options] url [url...]',
+        'usage': '%prog [OPTIONS] URL [URL...]',
         'conflict_handler': 'resolve',
     }
 
@@ -267,10 +267,12 @@ def parseOpts(overrideArguments=None):
         action='store', dest='format', metavar='FORMAT', default=None,
         help=(
             'video format code, specify the order of preference using'
-            ' slashes: -f 22/17/18 .  -f mp4 , -f m4a and  -f flv  are also'
-            ' supported. You can also use the special names "best",'
-            ' "bestvideo", "bestaudio", "worst", "worstvideo" and'
-            ' "worstaudio". By default, youtube-dl will pick the best quality.'
+            ' slashes, as in -f 22/17/18 . '
+            ' Instead of format codes, you can select by extension for the '
+            'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '
+            'You can also use the special names "best",'
+            ' "bestvideo", "bestaudio", "worst". '
+            ' By default, youtube-dl will pick the best quality.'
             ' Use commas to download multiple audio formats, such as'
             ' -f  136/137/mp4/bestvideo,140/m4a/bestaudio.'
             ' You can merge the video and audio of two formats into a single'
@@ -300,6 +302,12 @@ def parseOpts(overrideArguments=None):
         '--youtube-skip-dash-manifest',
         action='store_false', dest='youtube_include_dash_manifest',
         help='Do not download the DASH manifest on YouTube videos')
+    video_format.add_option(
+        '--merge-output-format',
+        action='store', dest='merge_output_format', metavar='FORMAT', default=None,
+        help=(
+            'If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.'
+            'Ignored if no merge is required'))
 
     subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
     subtitles.add_option(
@@ -443,6 +451,11 @@ def parseOpts(overrideArguments=None):
         '-J', '--dump-single-json',
         action='store_true', dest='dump_single_json', default=False,
         help='simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
+    verbosity.add_option(
+        '--print-json',
+        action='store_true', dest='print_json', default=False,
+        help='Be quiet and print the video information as JSON (video is still being downloaded).',
+    )
     verbosity.add_option(
         '--newline',
         action='store_true', dest='progress_with_newline', default=False,
index 048525efcaa848948c2c4090bd4570599b5e9759..d1b342c7a6aebe677d7a599ccd2d92b3391e1437 100644 (file)
@@ -80,8 +80,9 @@ class FFmpegPostProcessor(PostProcessor):
 
         files_cmd = []
         for path in input_paths:
-            files_cmd.extend(['-i', encodeFilename(path, True)])
-        cmd = ([self._executable, '-y'] + files_cmd
+            files_cmd.extend([encodeArgument('-i'), encodeFilename(path, True)])
+        cmd = ([encodeFilename(self._executable, True), encodeArgument('-y')] +
+               files_cmd
                + [encodeArgument(o) for o in opts] +
                [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
 
@@ -122,8 +123,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
             raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
         try:
             cmd = [
-                self._probe_executable,
-                '-show_streams',
+                encodeFilename(self._probe_executable, True),
+                encodeArgument('-show_streams'),
                 encodeFilename(self._ffmpeg_filename_argument(path), True)]
             handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
             output = handle.communicate()[0]
@@ -520,7 +521,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
 class FFmpegMergerPP(FFmpegPostProcessor):
     def run(self, info):
         filename = info['filepath']
-        args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest']
+        args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0']
         self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
         self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
         return True, info
index 2d2703368d8c2974e665985a10d1b6dcace8b235..3f9c5249dbc1b3e2d078db5254b4899b167965eb 100644 (file)
@@ -13,6 +13,7 @@ from .compat import (
     compat_str,
     compat_urllib_request,
 )
+from .utils import make_HTTPS_handler
 from .version import __version__
 
 
@@ -58,9 +59,12 @@ def update_self(to_screen, verbose):
         to_screen('It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
         return
 
+    https_handler = make_HTTPS_handler(False)
+    opener = compat_urllib_request.build_opener(https_handler)
+
     # Check if there is a new version
     try:
-        newversion = compat_urllib_request.urlopen(VERSION_URL).read().decode('utf-8').strip()
+        newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
     except:
         if verbose:
             to_screen(compat_str(traceback.format_exc()))
@@ -72,7 +76,7 @@ def update_self(to_screen, verbose):
 
     # Download and check versions info
     try:
-        versions_info = compat_urllib_request.urlopen(JSON_URL).read().decode('utf-8')
+        versions_info = opener.open(JSON_URL).read().decode('utf-8')
         versions_info = json.loads(versions_info)
     except:
         if verbose:
@@ -120,7 +124,7 @@ def update_self(to_screen, verbose):
             return
 
         try:
-            urlh = compat_urllib_request.urlopen(version['exe'][0])
+            urlh = opener.open(version['exe'][0])
             newcontent = urlh.read()
             urlh.close()
         except (IOError, OSError):
@@ -166,7 +170,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
     # Zip unix package
     elif isinstance(globals().get('__loader__'), zipimporter):
         try:
-            urlh = compat_urllib_request.urlopen(version['bin'][0])
+            urlh = opener.open(version['bin'][0])
             newcontent = urlh.read()
             urlh.close()
         except (IOError, OSError):
index efbe64fb315dba92383f8b1053a80558f4a12d7b..079e8d2c3f5168b3f0af233f3a5377f15e5a46f3 100644 (file)
@@ -205,6 +205,10 @@ def get_element_by_attribute(attribute, value, html):
 
 def clean_html(html):
     """Clean an HTML snippet into a readable string"""
+
+    if html is None:  # Convenience for sanitizing descriptions etc.
+        return html
+
     # Newline vs <br />
     html = html.replace('\n', ' ')
     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
@@ -1550,3 +1554,23 @@ def ytdl_is_updateable():
 def args_to_str(args):
     # Get a short string representation for a subprocess command
     return ' '.join(shlex_quote(a) for a in args)
+
+
+def urlhandle_detect_ext(url_handle):
+    try:
+        url_handle.headers
+        getheader = lambda h: url_handle.headers[h]
+    except AttributeError:  # Python < 3
+        getheader = url_handle.info().getheader
+
+    return getheader('Content-Type').split("/")[1]
+
+
+def age_restricted(content_limit, age_limit):
+    """ Returns True iff the content should be blocked """
+
+    if age_limit is None:  # No limit set
+        return False
+    if content_limit is None:
+        return False  # Content available for everyone
+    return age_limit < content_limit
index 1420af7467586d454973b5a9b244b6931349b927..8c57c7413b4491e3ae6497e2e7bbcb871f556bab 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2014.12.17.2'
+__version__ = '2015.01.09.2'