Merge branch 'chirbit' of https://github.com/skypher/youtube-dl into skypher-chirbit
authorSergey M․ <dstftw@gmail.com>
Mon, 23 Feb 2015 13:21:25 +0000 (19:21 +0600)
committerSergey M․ <dstftw@gmail.com>
Mon, 23 Feb 2015 13:21:25 +0000 (19:21 +0600)
26 files changed:
README.md
devscripts/check-porn.py
docs/supportedsites.md
test/test_swfinterp.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/downloader/common.py
youtube_dl/downloader/f4m.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/adobetv.py
youtube_dl/extractor/appletrailers.py
youtube_dl/extractor/common.py
youtube_dl/extractor/defense.py
youtube_dl/extractor/escapist.py
youtube_dl/extractor/gdcvault.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/imgur.py
youtube_dl/extractor/r7.py [new file with mode: 0644]
youtube_dl/extractor/rtve.py
youtube_dl/extractor/teamcoco.py
youtube_dl/extractor/ted.py
youtube_dl/extractor/tv4.py [new file with mode: 0644]
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/zapiks.py [new file with mode: 0644]
youtube_dl/utils.py
youtube_dl/version.py

index a2c1483117ccf2145fbdbb484a619e281e1892d5..8ea31d6059f05ac179f3879622af26cfc37ac110 100644 (file)
--- a/README.md
+++ b/README.md
@@ -571,7 +571,7 @@ Support requests for services that **do** purchase the rights to distribute thei
 
 ### How can I detect whether a given URL is supported by youtube-dl?
 
-For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
+For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
 
 It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
 
index 216282712c1b38b96c049f74a6cfe8a0fcd30806..6a5bd9eda333246c47064bf84cfc03da09de4caf 100644 (file)
@@ -45,12 +45,12 @@ for test in get_testcases():
 
         RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
 
-    if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
-                   or test['info_dict']['age_limit'] != 18):
+    if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or
+                   test['info_dict']['age_limit'] != 18):
         print('\nPotential missing age_limit check: {0}'.format(test['name']))
 
-    elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
-                         and test['info_dict']['age_limit'] == 18):
+    elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and
+                         test['info_dict']['age_limit'] == 18):
         print('\nPotential false negative: {0}'.format(test['name']))
 
     else:
index f6ba28e7add3448c56b39f598eb23a160e756dac..5fe3e47cd869a1bde22dc8854931f4a6a5a13700 100644 (file)
  - **Turbo**
  - **Tutv**
  - **tv.dfb.de**
+ - **TV4**: tv4.se and tv4play.se
  - **tvigle**: Интернет-телевидение Tvigle.ru
  - **tvp.pl**
  - **tvp.pl:Series**
  - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
  - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
  - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
+ - **Zapiks**
  - **ZDF**
  - **ZDFChannel**
  - **zingmp3:album**: mp3.zing.vn albums
index 9f18055e629d3c21826ad8159bdf0ae55409bca2..f1e8998192b131613cb9d26a1167ce35e0a61e9f 100644 (file)
@@ -34,8 +34,8 @@ def _make_testfunc(testfile):
     def test_func(self):
         as_file = os.path.join(TEST_DIR, testfile)
         swf_file = os.path.join(TEST_DIR, test_id + '.swf')
-        if ((not os.path.exists(swf_file))
-                or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
+        if ((not os.path.exists(swf_file)) or
+                os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
             # Recompile
             try:
                 subprocess.check_call([
index 88809783b7e4f60d6df60d030937808d91474f0a..ca7c3f5c6cc1e2fdb0669f2bfb22146510ff7865 100755 (executable)
@@ -308,8 +308,8 @@ class YoutubeDL(object):
                     raise
 
         if (sys.version_info >= (3,) and sys.platform != 'win32' and
-                sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
-                and not params.get('restrictfilenames', False)):
+                sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
+                not params.get('restrictfilenames', False)):
             # On Python 3, the Unicode filesystem API will throw errors (#1474)
             self.report_warning(
                 'Assuming --restrict-filenames since file system encoding '
@@ -1366,8 +1366,8 @@ class YoutubeDL(object):
         """Download a given list of URLs."""
         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
         if (len(url_list) > 1 and
-                '%' not in outtmpl
-                and self.params.get('max_downloads') != 1):
+                '%' not in outtmpl and
+                self.params.get('max_downloads') != 1):
             raise SameFileError(outtmpl)
 
         for url in url_list:
index eac2a26ec838873e7fe3e99483945a3115e83c33..25ab3fdfeeb880ceb787184f097df101fbc439f9 100644 (file)
@@ -189,14 +189,14 @@ def _real_main(argv=None):
         # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
         if opts.outtmpl is not None:
             opts.outtmpl = opts.outtmpl.decode(preferredencoding())
-    outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
-               or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
-               or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
-               or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
-               or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
-               or (opts.useid and '%(id)s.%(ext)s')
-               or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
-               or DEFAULT_OUTTMPL)
+    outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
+               (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
+               (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
+               (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
+               (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
+               (opts.useid and '%(id)s.%(ext)s') or
+               (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
+               DEFAULT_OUTTMPL)
     if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
         parser.error('Cannot download a video and extract audio into the same'
                      ' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
index 45e55b99c630ddbe914eeb58e26cadb943453841..3ae90021a28e661ab532a2d42a7c4e0826d1f46f 100644 (file)
@@ -311,14 +311,14 @@ class FileDownloader(object):
         """
 
         nooverwrites_and_exists = (
-            self.params.get('nooverwrites', False)
-            and os.path.exists(encodeFilename(filename))
+            self.params.get('nooverwrites', False) and
+            os.path.exists(encodeFilename(filename))
         )
 
         continuedl_and_exists = (
-            self.params.get('continuedl', False)
-            and os.path.isfile(encodeFilename(filename))
-            and not self.params.get('nopart', False)
+            self.params.get('continuedl', False) and
+            os.path.isfile(encodeFilename(filename)) and
+            not self.params.get('nopart', False)
         )
 
         # Check file already present
index b40ebfa50b9cc5e927b26fbea38d340d466bb758..7b8fe8cf57cfb57672f153512d60c93d4fe18b62 100644 (file)
@@ -325,8 +325,8 @@ class F4mFD(FileDownloader):
                 state['frag_index'] += 1
 
             estimated_size = (
-                (state['downloaded_bytes'] + frag_total_bytes)
-                (state['frag_index'] + 1) * total_frags)
+                (state['downloaded_bytes'] + frag_total_bytes) /
+                (state['frag_index'] + 1) * total_frags)
             time_now = time.time()
             state['total_bytes_estimate'] = estimated_size
             state['elapsed'] = time_now - start
index cf58f080030efbc921acb51399c48cafe39d97f8..c3088fba2e7bad3e6e34ab2855f0f732937ba215 100644 (file)
@@ -365,6 +365,7 @@ from .promptfile import PromptFileIE
 from .prosiebensat1 import ProSiebenSat1IE
 from .pyvideo import PyvideoIE
 from .quickvid import QuickVidIE
+from .r7 import R7IE
 from .radiode import RadioDeIE
 from .radiobremen import RadioBremenIE
 from .radiofrance import RadioFranceIE
@@ -494,6 +495,7 @@ from .tumblr import TumblrIE
 from .tunein import TuneInIE
 from .turbo import TurboIE
 from .tutv import TutvIE
+from .tv4 import TV4IE
 from .tvigle import TvigleIE
 from .tvp import TvpIE, TvpSeriesIE
 from .tvplay import TVPlayIE
@@ -615,6 +617,7 @@ from .youtube import (
     YoutubeUserIE,
     YoutubeWatchLaterIE,
 )
+from .zapiks import ZapiksIE
 from .zdf import ZDFIE, ZDFChannelIE
 from .zingmp3 import (
     ZingMp3SongIE,
index 28e07f8b04ed89fe7c79f445f3454adfb04d0561..97d12856092975a094ec18a5fd7ecafef39c255a 100644 (file)
@@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
         webpage = self._download_webpage(url, video_id)
 
         player = self._parse_json(
@@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor):
             self._html_search_meta('datepublished', webpage, 'upload date'))
 
         duration = parse_duration(
-            self._html_search_meta('duration', webpage, 'duration')
-            or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration'))
+            self._html_search_meta('duration', webpage, 'duration') or
+            self._search_regex(
+                r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
+                webpage, 'duration', fatal=False))
 
         view_count = str_to_int(self._search_regex(
             r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
index 43e82847ff8eb472bdac0a91c2138220f001b3b6..576f03b5b71115771555e1d8d46f4a108eb9de93 100644 (file)
@@ -11,8 +11,8 @@ from ..utils import (
 
 
 class AppleTrailersIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+    _TESTS = [{
         "url": "http://trailers.apple.com/trailers/wb/manofsteel/",
         'info_dict': {
             'id': 'manofsteel',
@@ -63,7 +63,10 @@ class AppleTrailersIE(InfoExtractor):
                 },
             },
         ]
-    }
+    }, {
+        'url': 'http://trailers.apple.com/ca/metropole/autrui/',
+        'only_matching': True,
+    }]
 
     _JSON_RE = r'iTunes.playURL\((.*?)\);'
 
index 08b8ad37c45ae1aa1528274071a250cfd3f27e89..79f6d199b4190664e62cddc59c6c3d46d785d49f 100644 (file)
@@ -391,6 +391,16 @@ class InfoExtractor(object):
             if blocked_iframe:
                 msg += ' Visit %s for more details' % blocked_iframe
             raise ExtractorError(msg, expected=True)
+        if '<title>The URL you requested has been blocked</title>' in content[:512]:
+            msg = (
+                'Access to this webpage has been blocked by Indian censorship. '
+                'Use a VPN or proxy server (with --proxy) to route around it.')
+            block_msg = self._html_search_regex(
+                r'</h1><p>(.*?)</p>',
+                content, 'block message', default=None)
+            if block_msg:
+                msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
+            raise ExtractorError(msg, expected=True)
 
         return content
 
@@ -798,8 +808,8 @@ class InfoExtractor(object):
             media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
         for i, media_el in enumerate(media_nodes):
             if manifest_version == '2.0':
-                manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/'
-                                (media_el.attrib.get('href') or media_el.attrib.get('url')))
+                manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
+                                (media_el.attrib.get('href') or media_el.attrib.get('url')))
             tbr = int_or_none(media_el.attrib.get('bitrate'))
             formats.append({
                 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
@@ -823,7 +833,7 @@ class InfoExtractor(object):
             'url': m3u8_url,
             'ext': ext,
             'protocol': 'm3u8',
-            'preference': -1,
+            'preference': preference - 1 if preference else -1,
             'resolution': 'multiple',
             'format_note': 'Quality selection URL',
         }]
index 2b90bf4fc2fcba04fe7e164602196586713d4225..98e3aedfd08ada1300cbf3114a41022949062402 100644 (file)
@@ -25,8 +25,9 @@ class DefenseGouvFrIE(InfoExtractor):
             r"flashvars.pvg_id=\"(\d+)\";",
             webpage, 'ID')
 
-        json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
-                    + video_id)
+        json_url = (
+            'http://static.videos.gouv.fr/brightcovehub/export/json/%s' %
+            video_id)
         info = self._download_json(json_url, title, 'Downloading JSON config')
         video_url = info['renditions'][0]['url']
 
index 4303feccdaedd865e2c769a282173de6a4d5ffce..b49b9869f2356eaa7e09916046a8f4b30f4f5d47 100644 (file)
@@ -22,6 +22,7 @@ class EscapistIE(InfoExtractor):
             'uploader_id': 'the-escapist-presents',
             'uploader': 'The Escapist Presents',
             'title': "Breaking Down Baldur's Gate",
+            'thumbnail': 're:^https?://.*\.jpg$',
         }
     }
 
@@ -30,19 +31,18 @@ class EscapistIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         uploader_id = self._html_search_regex(
-            r"<h1 class='headline'><a href='/videos/view/(.*?)'",
+            r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
             webpage, 'uploader ID', fatal=False)
         uploader = self._html_search_regex(
-            r"<h1 class='headline'>(.*?)</a>",
+            r"<h1\s+class='headline'>(.*?)</a>",
             webpage, 'uploader', fatal=False)
         description = self._html_search_meta('description', webpage)
 
         raw_title = self._html_search_meta('title', webpage, fatal=True)
         title = raw_title.partition(' : ')[2]
 
-        player_url = self._og_search_video_url(webpage, name='player URL')
-        config_url = compat_urllib_parse.unquote(self._search_regex(
-            r'config=(.*)$', player_url, 'config URL'))
+        config_url = compat_urllib_parse.unquote(self._html_search_regex(
+            r'<param\s+name="flashvars"\s+value="config=([^"&]+)', webpage, 'config URL'))
 
         formats = []
 
@@ -81,5 +81,4 @@ class EscapistIE(InfoExtractor):
             'title': title,
             'thumbnail': self._og_search_thumbnail(webpage),
             'description': description,
-            'player_url': player_url,
         }
index fed968f5179ebf6159212da5ab75b024b3bc0a03..05f58f1afa5c06eefb98f6cc0d5bfc0ce1eb5a79 100644 (file)
@@ -7,6 +7,7 @@ from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
+from ..utils import remove_end
 
 
 class GDCVaultIE(InfoExtractor):
@@ -68,7 +69,9 @@ class GDCVaultIE(InfoExtractor):
         akami_url = xml_description.find('./metadata/akamaiHost').text
         slide_video_path = xml_description.find('./metadata/slideVideo').text
         video_formats.append({
-            'url': 'rtmp://' + akami_url + '/' + slide_video_path,
+            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % 'fms.digitallyspeaking.com/cfx/st',
+            'play_path': remove_end(slide_video_path, '.flv'),
+            'ext': 'flv',
             'format_note': 'slide deck video',
             'quality': -2,
             'preference': -2,
@@ -76,7 +79,9 @@ class GDCVaultIE(InfoExtractor):
         })
         speaker_video_path = xml_description.find('./metadata/speakerVideo').text
         video_formats.append({
-            'url': 'rtmp://' + akami_url + '/' + speaker_video_path,
+            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % 'fms.digitallyspeaking.com/cfx/st',
+            'play_path': remove_end(speaker_video_path, '.flv'),
+            'ext': 'flv',
             'format_note': 'speaker video',
             'quality': -1,
             'preference': -1,
index 8dce96a648a953c893266c7676c992a7aa6eb2d1..875e1bf05ff274a41f46518c48e990954b7e12e5 100644 (file)
@@ -547,7 +547,16 @@ class GenericIE(InfoExtractor):
                 'id': 'aanslagen-kopenhagen',
                 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
             }
-        }
+        },
+        # Zapiks embed
+        {
+            'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
+            'info_dict': {
+                'id': '118046',
+                'ext': 'mp4',
+                'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
+            }
+        },
     ]
 
     def report_following_redirect(self, new_url):
@@ -1098,6 +1107,12 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'Livestream')
 
+        # Look for Zapiks embed
+        mobj = re.search(
+            r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'Zapiks')
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True
index b16c7aed0e26c30754224e86553c7af2be4e6e4a..fe5d95e2c9cad488f342233e7ebfd52e42a86de3 100644 (file)
@@ -19,16 +19,16 @@ class ImgurIE(InfoExtractor):
         'info_dict': {
             'id': 'A61SaA1',
             'ext': 'mp4',
-            'title': 'MRW gifv is up and running without any bugs',
-            'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
+            'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
+            'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
         },
     }, {
         'url': 'https://imgur.com/A61SaA1',
         'info_dict': {
             'id': 'A61SaA1',
             'ext': 'mp4',
-            'title': 'MRW gifv is up and running without any bugs',
-            'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
+            'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
+            'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
         },
     }]
 
diff --git a/youtube_dl/extractor/r7.py b/youtube_dl/extractor/r7.py
new file mode 100644 (file)
index 0000000..976c8fe
--- /dev/null
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    js_to_json,
+    unescapeHTML,
+    int_or_none,
+)
+
+
+class R7IE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://
+                        (?:
+                            (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/|
+                            noticias\.r7\.com(?:/[^/]+)+/[^/]+-|
+                            player\.r7\.com/video/i/
+                        )
+                        (?P<id>[\da-f]{24})
+                        '''
+    _TESTS = [{
+        'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html',
+        'md5': '403c4e393617e8e8ddc748978ee8efde',
+        'info_dict': {
+            'id': '54e7050b0cf2ff57e0279389',
+            'ext': 'mp4',
+            'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 98,
+            'like_count': int,
+            'view_count': int,
+        },
+    }, {
+        'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/',
+        'only_matching': True,
+    }, {
+        'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'http://player.r7.com/video/i/%s' % video_id, video_id)
+
+        item = self._parse_json(js_to_json(self._search_regex(
+            r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id)
+
+        title = unescapeHTML(item['title'])
+        thumbnail = item.get('init', {}).get('thumbUri')
+        duration = None
+
+        statistics = item.get('statistics', {})
+        like_count = int_or_none(statistics.get('likes'))
+        view_count = int_or_none(statistics.get('views'))
+
+        formats = []
+        for format_key, format_dict in item['playlist'][0].items():
+            src = format_dict.get('src')
+            if not src:
+                continue
+            format_id = format_dict.get('format') or format_key
+            if duration is None:
+                duration = format_dict.get('duration')
+            if '.f4m' in src:
+                formats.extend(self._extract_f4m_formats(src, video_id, preference=-1))
+            elif src.endswith('.m3u8'):
+                formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2))
+            else:
+                formats.append({
+                    'url': src,
+                    'format_id': format_id,
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'like_count': like_count,
+            'view_count': view_count,
+            'formats': formats,
+        }
index 3469d9578f5317222a404ce8f5918dd133d6f381..e60f85b5b4842d90b49aeec9aa87da8def92d4f9 100644 (file)
@@ -6,6 +6,7 @@ import re
 import time
 
 from .common import InfoExtractor
+from ..compat import compat_urlparse
 from ..utils import (
     struct_unpack,
     remove_end,
@@ -96,12 +97,10 @@ class RTVEALaCartaIE(InfoExtractor):
             ).replace('.net.rtve', '.multimedia.cdn.rtve')
             video_path = self._download_webpage(
                 auth_url, video_id, 'Getting video url')
-            # Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get
+            # Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get
             # the right Content-Length header and the mp4 format
-            video_url = (
-                'http://mvod.akcdn.rtve.es/{0}&v=2.6.8'
-                '&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path)
-            )
+            video_url = compat_urlparse.urljoin(
+                'http://mvod1.akcdn.rtve.es/', video_path)
 
         return {
             'id': video_id,
index a73da1c9c0d92657bd90f302b03e9fa8404c2dcf..5793dbc1085a86fdf573a432805be129dc62de94 100644 (file)
@@ -1,8 +1,10 @@
 from __future__ import unicode_literals
 
+import base64
 import re
 
 from .common import InfoExtractor
+from ..utils import qualities
 
 
 class TeamcocoIE(InfoExtractor):
@@ -24,8 +26,8 @@ class TeamcocoIE(InfoExtractor):
             'info_dict': {
                 'id': '19705',
                 'ext': 'mp4',
-                "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
-                "title": "Louis C.K. Interview Pt. 1 11/3/11",
+                'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
+                'title': 'Louis C.K. Interview Pt. 1 11/3/11',
                 'age_limit': 0,
             }
         }
@@ -42,42 +44,39 @@ class TeamcocoIE(InfoExtractor):
         display_id = mobj.group('display_id')
         webpage = self._download_webpage(url, display_id)
 
-        video_id = mobj.group("video_id")
+        video_id = mobj.group('video_id')
         if not video_id:
             video_id = self._html_search_regex(
                 self._VIDEO_ID_REGEXES, webpage, 'video id')
 
-        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
-        data = self._download_xml(
-            data_url, display_id, 'Downloading data webpage')
+        embed_url = 'http://teamcoco.com/embed/v/%s' % video_id
+        embed = self._download_webpage(
+            embed_url, video_id, 'Downloading embed page')
+
+        encoded_data = self._search_regex(
+            r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data')
+        data = self._parse_json(
+            base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id)
 
-        qualities = ['500k', '480p', '1000k', '720p', '1080p']
         formats = []
-        for filed in data.findall('files/file'):
-            if filed.attrib.get('playmode') == 'all':
-                # it just duplicates one of the entries
-                break
-            file_url = filed.text
-            m_format = re.search(r'(\d+(k|p))\.mp4', file_url)
+        get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
+        for filed in data['files']:
+            m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
             if m_format is not None:
                 format_id = m_format.group(1)
             else:
-                format_id = filed.attrib['bitrate']
+                format_id = filed['bitrate']
             tbr = (
-                int(filed.attrib['bitrate'])
-                if filed.attrib['bitrate'].isdigit()
+                int(filed['bitrate'])
+                if filed['bitrate'].isdigit()
                 else None)
 
-            try:
-                quality = qualities.index(format_id)
-            except ValueError:
-                quality = -1
             formats.append({
-                'url': file_url,
+                'url': filed['url'],
                 'ext': 'mp4',
                 'tbr': tbr,
                 'format_id': format_id,
-                'quality': quality,
+                'quality': get_quality(format_id),
             })
 
         self._sort_formats(formats)
@@ -86,8 +85,8 @@ class TeamcocoIE(InfoExtractor):
             'id': video_id,
             'display_id': display_id,
             'formats': formats,
-            'title': self._og_search_title(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
-            'description': self._og_search_description(webpage),
+            'title': data['title'],
+            'thumbnail': data.get('thumb', {}).get('href'),
+            'description': data.get('teaser'),
             'age_limit': self._family_friendly_search(webpage),
         }
index 10b3b706a9c82ef8398d408a948e72b6c52b31c3..59678399d5c5ba17894e557830e4c1d3e3236fab 100644 (file)
@@ -83,6 +83,22 @@ class TEDIE(SubtitlesInfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        # YouTube video
+        'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond',
+        'add_ie': ['Youtube'],
+        'info_dict': {
+            'id': 'aFBIPO-P7LM',
+            'ext': 'mp4',
+            'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville',
+            'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1',
+            'uploader': 'TEDx Talks',
+            'uploader_id': 'TEDxTalks',
+            'upload_date': '20111216',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     _NATIVE_FORMATS = {
@@ -132,11 +148,16 @@ class TEDIE(SubtitlesInfoExtractor):
 
         talk_info = self._extract_info(webpage)['talks'][0]
 
-        if talk_info.get('external') is not None:
-            self.to_screen('Found video from %s' % talk_info['external']['service'])
+        external = talk_info.get('external')
+        if external:
+            service = external['service']
+            self.to_screen('Found video from %s' % service)
+            ext_url = None
+            if service.lower() == 'youtube':
+                ext_url = external.get('code')
             return {
                 '_type': 'url',
-                'url': talk_info['external']['uri'],
+                'url': ext_url or external['uri'],
             }
 
         formats = [{
diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py
new file mode 100644 (file)
index 0000000..1c4b6d6
--- /dev/null
@@ -0,0 +1,100 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    parse_iso8601,
+)
+
+
+class TV4IE(InfoExtractor):
+    IE_DESC = 'tv4.se and tv4play.se'
+    _VALID_URL = r'''(?x)https?://(?:www\.)?
+        (?:
+            tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
+            tv4play\.se/
+            (?:
+                (?:program|barn)/(?:[^\?]+)\?video_id=|
+                iframe/video/|
+                film/|
+                sport/|
+            )
+        )(?P<id>[0-9]+)'''
+    _TESTS = [
+        {
+            'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
+            'md5': '909d6454b87b10a25aa04c4bdd416a9b',
+            'info_dict': {
+                'id': '2491650',
+                'ext': 'mp4',
+                'title': 'Kalla Fakta 5 (english subtitles)',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'timestamp': int,
+                'upload_date': '20131125',
+            },
+        },
+        {
+            'url': 'http://www.tv4play.se/iframe/video/3054113',
+            'md5': '77f851c55139ffe0ebd41b6a5552489b',
+            'info_dict': {
+                'id': '3054113',
+                'ext': 'mp4',
+                'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.',
+                'timestamp': int,
+                'upload_date': '20150130',
+            },
+        },
+        {
+            'url': 'http://www.tv4play.se/sport/3060959',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.tv4play.se/film/2378136',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
+            'only_matching': True,
+        },
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        info = self._download_json(
+            'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON')
+
+        # If is_geo_restricted is true, it doesn't neceserally mean we can't download it
+        if info['is_geo_restricted']:
+            self.report_warning('This content might not be available in your country due to licensing restrictions.')
+        if info['requires_subscription']:
+            raise ExtractorError('This content requires subscription.', expected=True)
+
+        sources_data = self._download_json(
+            'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON')
+        sources = sources_data['playback']
+
+        formats = []
+        for item in sources.get('items', {}).get('item', []):
+            ext, bitrate = item['mediaFormat'], item['bitrate']
+            formats.append({
+                'format_id': '%s_%s' % (ext, bitrate),
+                'tbr': bitrate,
+                'ext': ext,
+                'url': item['url'],
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': info['title'],
+            'formats': formats,
+            'description': info.get('description'),
+            'timestamp': parse_iso8601(info.get('broadcast_date_time')),
+            'duration': info.get('duration'),
+            'thumbnail': info.get('image'),
+            'is_live': sources.get('live'),
+        }
index 78d287e0e66d5d0972c3d48edb890673413085ea..4cd2f73d9962529db31e259011db9ccbfe053ac4 100644 (file)
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import json
 import re
 import itertools
+import hashlib
 
 from .common import InfoExtractor
 from .subtitles import SubtitlesInfoExtractor
@@ -225,6 +226,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
         if mobj.group('pro') or mobj.group('player'):
             url = 'http://player.vimeo.com/video/' + video_id
 
+        password = self._downloader.params.get('videopassword', None)
+        if password:
+            headers['Cookie'] = '%s_password=%s' % (
+                video_id, hashlib.md5(password.encode('utf-8')).hexdigest())
+
         # Retrieve video webpage to extract further information
         request = compat_urllib_request.Request(url, None, headers)
         try:
diff --git a/youtube_dl/extractor/zapiks.py b/youtube_dl/extractor/zapiks.py
new file mode 100644 (file)
index 0000000..22a9a57
--- /dev/null
@@ -0,0 +1,110 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    parse_iso8601,
+    xpath_with_ns,
+    xpath_text,
+    int_or_none,
+)
+
+
+class ZapiksIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
+    _TESTS = [
+        {
+            'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
+            'md5': 'aeb3c473b2d564b2d46d664d28d5f050',
+            'info_dict': {
+                'id': '80798',
+                'ext': 'mp4',
+                'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
+                'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 528,
+                'timestamp': 1359044972,
+                'upload_date': '20130124',
+                'view_count': int,
+                'comment_count': int,
+            },
+        },
+        {
+            'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.zapiks.fr/index.php?action=playerIframe&amp;media_id=118046&amp;width=640&amp;height=360&amp;autoStart=false&amp;language=fr',
+            'only_matching': True,
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id') or video_id
+
+        webpage = self._download_webpage(url, display_id)
+
+        if not video_id:
+            video_id = self._search_regex(
+                r'data-media-id="(\d+)"', webpage, 'video id')
+
+        playlist = self._download_xml(
+            'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id,
+            display_id)
+
+        NS_MAP = {
+            'jwplayer': 'http://rss.jwpcdn.com/'
+        }
+
+        def ns(path):
+            return xpath_with_ns(path, NS_MAP)
+
+        item = playlist.find('./channel/item')
+
+        title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage)
+        description = self._og_search_description(webpage, default=None)
+        thumbnail = xpath_text(
+            item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None)
+        duration = parse_duration(self._html_search_meta(
+            'duration', webpage, 'duration', default=None))
+        timestamp = parse_iso8601(self._html_search_meta(
+            'uploadDate', webpage, 'upload date', default=None), ' ')
+
+        view_count = int_or_none(self._search_regex(
+            r'UserPlays:(\d+)', webpage, 'view count', default=None))
+        comment_count = int_or_none(self._search_regex(
+            r'UserComments:(\d+)', webpage, 'comment count', default=None))
+
+        formats = []
+        for source in item.findall(ns('./jwplayer:source')):
+            format_id = source.attrib['label']
+            f = {
+                'url': source.attrib['file'],
+                'format_id': format_id,
+            }
+            m = re.search(r'^(?P<height>\d+)[pP]', format_id)
+            if m:
+                f['height'] = int(m.group('height'))
+            formats.append(f)
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'timestamp': timestamp,
+            'view_count': view_count,
+            'comment_count': comment_count,
+            'formats': formats,
+        }
index 238b6556beb514f2cc4755f4ad767c44509d68f6..475fad3c903f9a2923def9f186c746d067807a68 100644 (file)
@@ -900,8 +900,8 @@ def _windows_write_string(s, out):
     def not_a_console(handle):
         if handle == INVALID_HANDLE_VALUE or handle is None:
             return True
-        return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
-                or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
+        return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
+                GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
 
     if not_a_console(h):
         return False
index 537e8cf6060ad7cfb36fab1e573a346bde41fe0e..7c8b29c3b852436732a4f5a430fd0dae73fa8fa6 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2015.02.19.3'
+__version__ = '2015.02.21'