Merge pull request #8766 from yan12125/dash-detect-ext
authorYen Chi Hsuan <yan12125@gmail.com>
Fri, 11 Mar 2016 13:40:07 +0000 (21:40 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Fri, 11 Mar 2016 13:40:07 +0000 (21:40 +0800)
Detect file extensions of DASH formats from their codecs

36 files changed:
AUTHORS
README.md
docs/supportedsites.md
test/test_YoutubeDL.py
youtube_dl/YoutubeDL.py
youtube_dl/downloader/fragment.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/aol.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/cinemassacre.py
youtube_dl/extractor/cnet.py
youtube_dl/extractor/common.py
youtube_dl/extractor/elpais.py
youtube_dl/extractor/engadget.py
youtube_dl/extractor/facebook.py
youtube_dl/extractor/fivemin.py
youtube_dl/extractor/freespeech.py
youtube_dl/extractor/iqiyi.py
youtube_dl/extractor/jeuxvideo.py
youtube_dl/extractor/khanacademy.py
youtube_dl/extractor/livestream.py
youtube_dl/extractor/mit.py
youtube_dl/extractor/pyvideo.py
youtube_dl/extractor/revision3.py
youtube_dl/extractor/safari.py
youtube_dl/extractor/sexu.py
youtube_dl/extractor/ted.py
youtube_dl/extractor/twitch.py
youtube_dl/extractor/vgtv.py
youtube_dl/extractor/viki.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/wimp.py
youtube_dl/postprocessor/__init__.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/version.py

diff --git a/AUTHORS b/AUTHORS
index b51e23f2d6552e570717ebc6520dbcf3a5d17714..b6b47ac5759c4ead1f99ccbcf413a6d064b82f6d 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -161,3 +161,4 @@ Jens Wille
 Robin Houtevelts
 Patrick Griffis
 Aidan Rowe
+mutantmonkey
index d66804a7a19f667f0d8e3b19b707eec46bf2bb8d..a774e73e75ecbf1bc2c8c5c5b45798a66b866eef 100644 (file)
--- a/README.md
+++ b/README.md
@@ -80,6 +80,8 @@ which means you can modify it, redistribute it or use it however you like.
                                      on Windows)
     --flat-playlist                  Do not extract the videos of a playlist,
                                      only list them.
+    --mark-watched                   Mark videos watched (YouTube only)
+    --no-mark-watched                Do not mark videos watched (YouTube only)
     --no-color                       Do not emit color codes in output
 
 ## Network Options:
index 43403233d898ceefba6832fd7b55fb1f6d5f4367..0682e4af041c8d01ca8507679782a403a68d9564 100644 (file)
  - **faz.net**
  - **fc2**
  - **Fczenit**
+ - **features.aol.com**
  - **fernsehkritik.tv**
  - **Firstpost**
  - **FiveTV**
  - **kontrtube**: KontrTube.ru - Труба зовёт
  - **KrasView**: Красвью
  - **Ku6**
+ - **KUSI**
  - **kuwo:album**: 酷我音乐 - 专辑
  - **kuwo:category**: 酷我音乐 - 分类
  - **kuwo:chart**: 酷我音乐 - 排行榜
  - **kuwo:song**: 酷我音乐
  - **la7.tv**
  - **Laola1Tv**
+ - **Le**: 乐视网
  - **Lecture2Go**
  - **Lemonde**
- - **Letv**: 乐视网
+ - **LePlaylist**
  - **LetvCloud**: 乐视云
- - **LetvPlaylist**
- - **LetvTv**
  - **Libsyn**
  - **life:embed**
  - **lifenews**: LIFE | NEWS
  - **Npr**
  - **NRK**
  - **NRKPlaylist**
+ - **NRKSkole**: NRK Skole
  - **NRKTV**: NRK TV and NRK Radio
  - **ntv.ru**
  - **Nuvid**
index 59f7ab49dbe4458b5b821d9fae7d629ffab5db1a..efbee3b711b046f62fbb486375486a9e558e5035 100644 (file)
@@ -502,6 +502,9 @@ class TestYoutubeDL(unittest.TestCase):
         assertRegexpMatches(self, ydl._format_note({
             'vbr': 10,
         }), '^\s*10k$')
+        assertRegexpMatches(self, ydl._format_note({
+            'fps': 30,
+        }), '^30fps$')
 
     def test_postprocessors(self):
         filename = 'post-processor-testfile.mp4'
index dcc867e456db6b0131ed797ae6bfd78f327834dc..b3391088f9003c3a94daa2acd4e130c6f5223cd2 100755 (executable)
@@ -85,6 +85,7 @@ from .extractor import get_info_extractor, gen_extractors
 from .downloader import get_suitable_downloader
 from .downloader.rtmp import rtmpdump_version
 from .postprocessor import (
+    FFmpegFixupM3u8PP,
     FFmpegFixupM4aPP,
     FFmpegFixupStretchedPP,
     FFmpegMergerPP,
@@ -567,7 +568,7 @@ class YoutubeDL(object):
                 elif template_dict.get('height'):
                     template_dict['resolution'] = '%sp' % template_dict['height']
                 elif template_dict.get('width'):
-                    template_dict['resolution'] = '?x%d' % template_dict['width']
+                    template_dict['resolution'] = '%dx?' % template_dict['width']
 
             sanitize = lambda k, v: sanitize_filename(
                 compat_str(v),
@@ -1638,6 +1639,8 @@ class YoutubeDL(object):
                 if fixup_policy is None:
                     fixup_policy = 'detect_or_warn'
 
+                INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
+
                 stretched_ratio = info_dict.get('stretched_ratio')
                 if stretched_ratio is not None and stretched_ratio != 1:
                     if fixup_policy == 'warn':
@@ -1650,15 +1653,18 @@ class YoutubeDL(object):
                             info_dict['__postprocessors'].append(stretched_pp)
                         else:
                             self.report_warning(
-                                '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
-                                    info_dict['id'], stretched_ratio))
+                                '%s: Non-uniform pixel ratio (%s). %s'
+                                % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
                     else:
                         assert fixup_policy in ('ignore', 'never')
 
-                if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
+                if (info_dict.get('requested_formats') is None and
+                        info_dict.get('container') == 'm4a_dash'):
                     if fixup_policy == 'warn':
-                        self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
-                            info_dict['id']))
+                        self.report_warning(
+                            '%s: writing DASH m4a. '
+                            'Only some players support this container.'
+                            % info_dict['id'])
                     elif fixup_policy == 'detect_or_warn':
                         fixup_pp = FFmpegFixupM4aPP(self)
                         if fixup_pp.available:
@@ -1666,8 +1672,27 @@ class YoutubeDL(object):
                             info_dict['__postprocessors'].append(fixup_pp)
                         else:
                             self.report_warning(
-                                '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
-                                    info_dict['id']))
+                                '%s: writing DASH m4a. '
+                                'Only some players support this container. %s'
+                                % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
+                    else:
+                        assert fixup_policy in ('ignore', 'never')
+
+                if (info_dict.get('protocol') == 'm3u8_native' or
+                        info_dict.get('protocol') == 'm3u8' and
+                        self.params.get('hls_prefer_native')):
+                    if fixup_policy == 'warn':
+                        self.report_warning('%s: malformated aac bitstream.' % (
+                            info_dict['id']))
+                    elif fixup_policy == 'detect_or_warn':
+                        fixup_pp = FFmpegFixupM3u8PP(self)
+                        if fixup_pp.available:
+                            info_dict.setdefault('__postprocessors', [])
+                            info_dict['__postprocessors'].append(fixup_pp)
+                        else:
+                            self.report_warning(
+                                '%s: malformated aac bitstream. %s'
+                                % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
                     else:
                         assert fixup_policy in ('ignore', 'never')
 
@@ -1831,7 +1856,9 @@ class YoutubeDL(object):
         if fdict.get('vbr') is not None:
             res += '%4dk' % fdict['vbr']
         if fdict.get('fps') is not None:
-            res += ', %sfps' % fdict['fps']
+            if res:
+                res += ', '
+            res += '%sfps' % fdict['fps']
         if fdict.get('acodec') is not None:
             if res:
                 res += ', '
index 5bc99492bc7b90abdc5c133b3f6c573d54fe9ed3..a5bae96699e0b0f81fd11deab4900fe5ed8b820d 100644 (file)
@@ -99,7 +99,8 @@ class FragmentFD(FileDownloader):
                     state['eta'] = self.calc_eta(
                         start, time_now, estimated_size,
                         state['downloaded_bytes'])
-                state['speed'] = s.get('speed')
+                state['speed'] = s.get('speed') or ctx.get('speed')
+                ctx['speed'] = state['speed']
                 ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
             self._hook_progress(state)
 
index 08b3dc67393c688529ea2b524950f7a6200fd433..98de5ddff3f6903f8af818d5dacba4edec6b99ac 100644 (file)
@@ -23,7 +23,10 @@ from .alphaporno import AlphaPornoIE
 from .animeondemand import AnimeOnDemandIE
 from .anitube import AnitubeIE
 from .anysex import AnySexIE
-from .aol import AolIE
+from .aol import (
+    AolIE,
+    AolFeaturesIE,
+)
 from .allocine import AllocineIE
 from .aparat import AparatIE
 from .appleconnect import AppleConnectIE
@@ -209,10 +212,7 @@ from .everyonesmixtape import EveryonesMixtapeIE
 from .exfm import ExfmIE
 from .expotv import ExpoTVIE
 from .extremetube import ExtremeTubeIE
-from .facebook import (
-    FacebookIE,
-    FacebookPostIE,
-)
+from .facebook import FacebookIE
 from .faz import FazIE
 from .fc2 import FC2IE
 from .fczenit import FczenitIE
@@ -856,6 +856,7 @@ from .vimeo import (
     VimeoChannelIE,
     VimeoGroupsIE,
     VimeoLikesIE,
+    VimeoOndemandIE,
     VimeoReviewIE,
     VimeoUserIE,
     VimeoWatchLaterIE,
index b51eafc45928f8e6ff4ce571763593f71b715583..b761b2cc4c5d3d4b70766ed56ff5c3529dd39e6b 100644 (file)
@@ -1,24 +1,11 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
 class AolIE(InfoExtractor):
     IE_NAME = 'on.aol.com'
-    _VALID_URL = r'''(?x)
-        (?:
-            aol-video:|
-            http://on\.aol\.com/
-            (?:
-                video/.*-|
-                playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
-            )
-        )
-        (?P<id>[0-9]+)
-        (?:$|\?)
-    '''
+    _VALID_URL = r'(?:aol-video:|http://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)'
 
     _TESTS = [{
         'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
@@ -29,42 +16,31 @@ class AolIE(InfoExtractor):
             'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
         },
         'add_ie': ['FiveMin'],
-    }, {
-        'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
-        'info_dict': {
-            'id': '152147',
-            'title': 'Brace Yourself - Today\'s Weirdest News',
-        },
-        'playlist_mincount': 10,
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        playlist_id = mobj.group('playlist_id')
-        if not playlist_id or self._downloader.params.get('noplaylist'):
-            return self.url_result('5min:%s' % video_id)
+        video_id = self._match_id(url)
+        return self.url_result('5min:%s' % video_id)
 
-        self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
 
-        webpage = self._download_webpage(url, playlist_id)
-        title = self._html_search_regex(
-            r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
-        playlist_html = self._search_regex(
-            r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
-            'playlist HTML')
-        entries = [{
-            '_type': 'url',
-            'url': 'aol-video:%s' % m.group('id'),
-            'ie_key': 'Aol',
-        } for m in re.finditer(
-            r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
-            playlist_html)]
+class AolFeaturesIE(InfoExtractor):
+    IE_NAME = 'features.aol.com'
+    _VALID_URL = r'http://features\.aol\.com/video/(?P<id>[^/?#]+)'
 
-        return {
-            '_type': 'playlist',
-            'id': playlist_id,
-            'display_id': mobj.group('playlist_display_id'),
-            'title': title,
-            'entries': entries,
-        }
+    _TESTS = [{
+        'url': 'http://features.aol.com/video/behind-secret-second-careers-late-night-talk-show-hosts',
+        'md5': '7db483bb0c09c85e241f84a34238cc75',
+        'info_dict': {
+            'id': '519507715',
+            'ext': 'mp4',
+            'title': 'What To Watch - February 17, 2016',
+        },
+        'add_ie': ['FiveMin'],
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        return self.url_result(self._search_regex(
+            r'<script type="text/javascript" src="(https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js[^"]+)"',
+            webpage, '5min embed url'), 'FiveMin')
index efde7e207bc8d166e80f2a26429797684535d114..3e119e21b39ba2ab6bc504cf1d19a90008bfbd24 100644 (file)
@@ -121,15 +121,18 @@ class ArteTVPlus7IE(InfoExtractor):
                 json_url = compat_parse_qs(
                     compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
         if json_url:
-            return self._extract_from_json_url(json_url, video_id, lang)
-        # Differend kind of embed URL (e.g.
+            title = self._search_regex(
+                r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
+                webpage, 'title', default=None, group='title')
+            return self._extract_from_json_url(json_url, video_id, lang, title=title)
+        # Different kind of embed URL (e.g.
         # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
         embed_url = self._search_regex(
             r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
             webpage, 'embed url', group='url')
         return self.url_result(embed_url)
 
-    def _extract_from_json_url(self, json_url, video_id, lang):
+    def _extract_from_json_url(self, json_url, video_id, lang, title=None):
         info = self._download_json(json_url, video_id)
         player_info = info['videoJsonPlayer']
 
@@ -137,7 +140,7 @@ class ArteTVPlus7IE(InfoExtractor):
         if not upload_date_str:
             upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
 
-        title = player_info['VTI'].strip()
+        title = (player_info.get('VTI') or title or player_info['VID']).strip()
         subtitle = player_info.get('VSU', '').strip()
         if subtitle:
             title += ' - %s' % subtitle
index 6d9cd8abd1545ff09d27c991a7dc7c5d2cc2a872..042c4f2f13757ab3f0c942932ada8e7a01160055 100644 (file)
@@ -21,6 +21,10 @@ class CinemassacreIE(InfoExtractor):
                 'title': '“Angry Video Game Nerd: The Movie” – Trailer',
                 'description': 'md5:fb87405fcb42a331742a0dce2708560b',
             },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
         },
         {
             'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
@@ -31,14 +35,18 @@ class CinemassacreIE(InfoExtractor):
                 'upload_date': '20131002',
                 'title': 'The Mummy’s Hand (1940)',
             },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
         },
         {
             # Youtube embedded video
             'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
-            'md5': 'df4cf8a1dcedaec79a73d96d83b99023',
+            'md5': 'ec9838a5520ef5409b3e4e42fcb0a3b9',
             'info_dict': {
                 'id': 'OEVzPCY2T-g',
-                'ext': 'mp4',
+                'ext': 'webm',
                 'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
                 'upload_date': '20061207',
                 'uploader': 'Cinemassacre',
@@ -49,12 +57,12 @@ class CinemassacreIE(InfoExtractor):
         {
             # Youtube embedded video
             'url': 'http://cinemassacre.com/2006/09/01/mckids/',
-            'md5': '6eb30961fa795fedc750eac4881ad2e1',
+            'md5': '7393c4e0f54602ad110c793eb7a6513a',
             'info_dict': {
                 'id': 'FnxsNhuikpo',
-                'ext': 'mp4',
+                'ext': 'webm',
                 'upload_date': '20060901',
-                'uploader': 'Cinemassacre Extras',
+                'uploader': 'Cinemassacre Extra',
                 'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53',
                 'uploader_id': 'Cinemassacre',
                 'title': 'AVGN: McKids',
@@ -69,7 +77,11 @@ class CinemassacreIE(InfoExtractor):
                 'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!',
                 'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays',
                 'upload_date': '20150525',
-            }
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
         }
     ]
 
index 5c3908f72b2f94c5557aaeaaa2e19ec78716c0f0..3cf0bf95b4386b393df28d46367887c2e7952ae9 100644 (file)
@@ -51,9 +51,7 @@ class CNETIE(ThePlatformIE):
             uploader = None
             uploader_id = None
 
-        mpx_account = data['config']['uvpConfig']['default']['mpx_account']
-
-        metadata = self.get_metadata('%s/%s' % (mpx_account, list(vdata['files'].values())[0]), video_id)
+        metadata = self.get_metadata('kYEXFC/%s' % list(vdata['files'].values())[0], video_id)
         description = vdata.get('description') or metadata.get('description')
         duration = int_or_none(vdata.get('duration')) or metadata.get('duration')
 
@@ -62,7 +60,7 @@ class CNETIE(ThePlatformIE):
         for (fkey, vid) in vdata['files'].items():
             if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
                 continue
-            release_url = 'http://link.theplatform.com/s/%s/%s?format=SMIL&mbr=true' % (mpx_account, vid)
+            release_url = 'http://link.theplatform.com/s/kYEXFC/%s?format=SMIL&mbr=true' % vid
             if fkey == 'hds':
                 release_url += '&manifest=f4m'
             tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
index f337caf20e8d2b24692ac2c01903df8d9580f49b..3936772513a874b866346ee90803b9814f848cb1 100644 (file)
@@ -106,7 +106,7 @@ class InfoExtractor(object):
                     * protocol   The protocol that will be used for the actual
                                  download, lower-case.
                                  "http", "https", "rtsp", "rtmp", "rtmpe",
-                                 "m3u8", or "m3u8_native".
+                                 "m3u8", "m3u8_native" or "http_dash_segments".
                     * preference Order number of this format. If this field is
                                  present and not None, the formats get sorted
                                  by this field, regardless of all other values.
index 00a69e6312aede6069e062c6abff29137939daa9..8c725a4e631860584781b116e72b02dd05813fc2 100644 (file)
@@ -9,7 +9,7 @@ class ElPaisIE(InfoExtractor):
     _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
     IE_DESC = 'El País'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
         'md5': '98406f301f19562170ec071b83433d55',
         'info_dict': {
@@ -19,30 +19,41 @@ class ElPaisIE(InfoExtractor):
             'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
             'upload_date': '20140206',
         }
-    }
+    }, {
+        'url': 'http://elcomidista.elpais.com/elcomidista/2016/02/24/articulo/1456340311_668921.html#?id_externo_nwl=newsletter_diaria20160303t',
+        'md5': '3bd5b09509f3519d7d9e763179b013de',
+        'info_dict': {
+            'id': '1456340311_668921',
+            'ext': 'mp4',
+            'title': 'Cómo hacer el mejor café con cafetera italiana',
+            'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
+            'upload_date': '20160303',
+        }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         prefix = self._html_search_regex(
-            r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
+            r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
         video_suffix = self._search_regex(
-            r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
+            r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
         video_url = prefix + video_suffix
         thumbnail_suffix = self._search_regex(
-            r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
-            fatal=False)
+            r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
+            webpage, 'thumbnail URL', fatal=False)
         thumbnail = (
             None if thumbnail_suffix is None
             else prefix + thumbnail_suffix)
         title = self._html_search_regex(
-            '<h2 class="entry-header entry-title.*?>(.*?)</h2>',
+            (r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title',
+             r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),
             webpage, 'title')
-        date_str = self._search_regex(
+        upload_date = unified_strdate(self._search_regex(
             r'<p class="date-header date-int updated"\s+title="([^"]+)">',
-            webpage, 'upload date', fatal=False)
-        upload_date = (None if date_str is None else unified_strdate(date_str))
+            webpage, 'upload date', default=None) or self._html_search_meta(
+            'datePublished', webpage, 'timestamp'))
 
         return {
             'id': video_id,
index e4180701d7d5fe7f538d029e8ffb27235b6135df..e5e57d48518d3dd3999dad650d0c32406079ce33 100644 (file)
@@ -1,21 +1,13 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
-    url_basename,
-)
 
 
 class EngadgetIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://www.engadget.com/
-        (?:video(?:/5min)?/(?P<id>\d+)|
-            [\d/]+/.*?)
-        '''
+    _VALID_URL = r'https?://www.engadget.com/video/(?P<id>\d+)'
 
     _TEST = {
-        'url': 'http://www.engadget.com/video/5min/518153925/',
+        'url': 'http://www.engadget.com/video/518153925/',
         'md5': 'c6820d4828a5064447a4d9fc73f312c9',
         'info_dict': {
             'id': '518153925',
@@ -27,15 +19,4 @@ class EngadgetIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
-        if video_id is not None:
-            return self.url_result('5min:%s' % video_id)
-        else:
-            title = url_basename(url)
-            webpage = self._download_webpage(url, title)
-            ids = re.findall(r'<iframe[^>]+?playList=(\d+)', webpage)
-            return {
-                '_type': 'playlist',
-                'title': title,
-                'entries': [self.url_result('5min:%s' % vid) for vid in ids]
-            }
+        return self.url_result('5min:%s' % video_id)
index 6c6c3b1bd460407322aab1f35ddd8e55cefaad17..f5bbd39d2d0e90996c118e3fae325034fc2bbb6d 100644 (file)
@@ -37,7 +37,9 @@ class FacebookIE(InfoExtractor):
                                 video/embed|
                                 story\.php
                             )\?(?:.*?)(?:v|video_id|story_fbid)=|
-                            [^/]+/videos/(?:[^/]+/)?
+                            [^/]+/videos/(?:[^/]+/)?|
+                            [^/]+/posts/|
+                            groups/[^/]+/permalink/
                         )|
                     facebook:
                 )
@@ -50,6 +52,8 @@ class FacebookIE(InfoExtractor):
 
     _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
 
+    _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
+
     _TESTS = [{
         'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
         'md5': '6a40d33c0eccbb1af76cf0485a052659',
@@ -81,6 +85,33 @@ class FacebookIE(InfoExtractor):
             'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
             'uploader': 'Demy de Zeeuw',
         },
+    }, {
+        'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
+        'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
+        'info_dict': {
+            'id': '544765982287235',
+            'ext': 'mp4',
+            'title': '"What are you doing running in the snow?"',
+            'uploader': 'FailArmy',
+        }
+    }, {
+        'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
+        'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
+        'info_dict': {
+            'id': '1035862816472149',
+            'ext': 'mp4',
+            'title': 'What the Flock Is Going On In New Zealand  Credit: ViralHog',
+            'uploader': 'S. Saint',
+        },
+    }, {
+        'note': 'swf params escaped',
+        'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
+        'md5': '97ba073838964d12c70566e0085c2b91',
+        'info_dict': {
+            'id': '10153664894881749',
+            'ext': 'mp4',
+            'title': 'Facebook video #10153664894881749',
+        },
     }, {
         'url': 'https://www.facebook.com/video.php?v=10204634152394104',
         'only_matching': True,
@@ -94,7 +125,7 @@ class FacebookIE(InfoExtractor):
         'url': 'facebook:544765982287235',
         'only_matching': True,
     }, {
-        'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
+        'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
         'only_matching': True,
     }]
 
@@ -164,19 +195,19 @@ class FacebookIE(InfoExtractor):
     def _real_initialize(self):
         self._login()
 
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        req = sanitized_Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
+    def _extract_from_url(self, url, video_id, fatal_if_no_video=True):
+        req = sanitized_Request(url)
         req.add_header('User-Agent', self._CHROME_USER_AGENT)
         webpage = self._download_webpage(req, video_id)
 
         video_data = None
 
-        BEFORE = '{swf.addParam(param[0], param[1]);});\n'
+        BEFORE = '{swf.addParam(param[0], param[1]);});'
         AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
-        m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
+        m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage)
         if m:
-            data = dict(json.loads(m.group(1)))
+            swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"')
+            data = dict(json.loads(swf_params))
             params_raw = compat_urllib_parse_unquote(data['params'])
             video_data = json.loads(params_raw)['video_data']
 
@@ -189,13 +220,15 @@ class FacebookIE(InfoExtractor):
 
         if not video_data:
             server_js_data = self._parse_json(self._search_regex(
-                r'handleServerJS\(({.+})\);', webpage, 'server js data'), video_id)
+                r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id)
             for item in server_js_data.get('instances', []):
                 if item[1][0] == 'VideoConfig':
                     video_data = video_data_list2dict(item[2][0]['videoData'])
                     break
 
         if not video_data:
+            if not fatal_if_no_video:
+                return webpage, False
             m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
             if m_msg is not None:
                 raise ExtractorError(
@@ -241,39 +274,36 @@ class FacebookIE(InfoExtractor):
             video_title = 'Facebook video #%s' % video_id
         uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
 
-        return {
+        info_dict = {
             'id': video_id,
             'title': video_title,
             'formats': formats,
             'uploader': uploader,
         }
 
-
-class FacebookPostIE(InfoExtractor):
-    IE_NAME = 'facebook:post'
-    _VALID_URL = r'https?://(?:\w+\.)?facebook\.com/[^/]+/posts/(?P<id>\d+)'
-    _TEST = {
-        'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
-        'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
-        'info_dict': {
-            'id': '544765982287235',
-            'ext': 'mp4',
-            'title': '"What are you doing running in the snow?"',
-            'uploader': 'FailArmy',
-        }
-    }
+        return webpage, info_dict
 
     def _real_extract(self, url):
-        post_id = self._match_id(url)
+        video_id = self._match_id(url)
+
+        real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
+        webpage, info_dict = self._extract_from_url(real_url, video_id, fatal_if_no_video=False)
 
-        webpage = self._download_webpage(url, post_id)
+        if info_dict:
+            return info_dict
 
-        entries = [
-            self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
-            for video_id in self._parse_json(
-                self._search_regex(
-                    r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
-                    webpage, 'video ids', group='ids'),
-                post_id)]
+        if '/posts/' in url:
+            entries = [
+                self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
+                for vid in self._parse_json(
+                    self._search_regex(
+                        r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
+                        webpage, 'video ids', group='ids'),
+                    video_id)]
 
-        return self.playlist_result(entries, post_id)
+            return self.playlist_result(entries, video_id)
+        else:
+            _, info_dict = self._extract_from_url(
+                self._VIDEO_PAGE_TEMPLATE % video_id,
+                video_id, fatal_if_no_video=True)
+            return info_dict
index 2955965d908c15f21b3f8880993530779f97ec15..67d50a386ce812018047f711205b8619d75c1bf8 100644 (file)
@@ -1,5 +1,7 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..compat import (
     compat_urllib_parse,
@@ -16,12 +18,7 @@ from ..utils import (
 
 class FiveMinIE(InfoExtractor):
     IE_NAME = '5min'
-    _VALID_URL = r'''(?x)
-        (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
-            https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
-            5min:)
-        (?P<id>\d+)
-        '''
+    _VALID_URL = r'(?:5min:(?P<id>\d+)(?::(?P<sid>\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P<query>.*))'
 
     _TESTS = [
         {
@@ -45,6 +42,7 @@ class FiveMinIE(InfoExtractor):
                 'title': 'How to Make a Next-Level Fruit Salad',
                 'duration': 184,
             },
+            'skip': 'no longer available',
         },
     ]
     _ERRORS = {
@@ -91,20 +89,33 @@ class FiveMinIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        sid = mobj.group('sid')
+
+        if mobj.group('query'):
+            qs = compat_parse_qs(mobj.group('query'))
+            if not qs.get('playList'):
+                raise ExtractorError('Invalid URL', expected=True)
+            video_id = qs['playList'][0]
+            if qs.get('sid'):
+                sid = qs['sid'][0]
+
         embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
-        embed_page = self._download_webpage(embed_url, video_id,
-                                            'Downloading embed page')
-        sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
-        query = compat_urllib_parse.urlencode({
-            'func': 'GetResults',
-            'playlist': video_id,
-            'sid': sid,
-            'isPlayerSeed': 'true',
-            'url': embed_url,
-        })
+        if not sid:
+            embed_page = self._download_webpage(embed_url, video_id,
+                                                'Downloading embed page')
+            sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
+
         response = self._download_json(
-            'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
+            'https://syn.5min.com/handlers/SenseHandler.ashx?' +
+            compat_urllib_parse.urlencode({
+                'func': 'GetResults',
+                'playlist': video_id,
+                'sid': sid,
+                'isPlayerSeed': 'true',
+                'url': embed_url,
+            }),
             video_id)
         if not response['success']:
             raise ExtractorError(
@@ -118,9 +129,7 @@ class FiveMinIE(InfoExtractor):
         parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
             compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
         for rendition in info['Renditions']:
-            if rendition['RenditionType'] == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(rendition['Url'], video_id, m3u8_id='hls'))
-            elif rendition['RenditionType'] == 'aac':
+            if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8':
                 continue
             else:
                 rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
index c210177f7297e174d38988a2e62f379a9a478305..1477708bbec14c38bf0db7801d09d68a22ff1546 100644 (file)
@@ -14,7 +14,7 @@ class FreespeechIE(InfoExtractor):
         'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0',
         'info_dict': {
             'id': 'poKsVCZ64uU',
-            'ext': 'mp4',
+            'ext': 'webm',
             'title': 'Obama, Romney Campaign in Colorado Ahead of Debate',
             'description': 'Obama, Romney Campaign in Colorado Ahead of Debate',
             'uploader': 'freespeechtv',
index d3bee3a1902c783543831fa508d30ce734a140f0..e7c0cb3f66ab542e79f86238d2db991047d6d453 100644 (file)
@@ -501,7 +501,7 @@ class IqiyiIE(InfoExtractor):
     def get_enc_key(self, video_id):
         # TODO: automatic key extraction
         # last update at 2016-01-22 for Zombie::bite
-        enc_key = '6ab6d0280511493ba85594779759d4ed'
+        enc_key = '8ed797d224d043e7ac23d95b70227d32'
         return enc_key
 
     def _extract_playlist(self, webpage):
index eef7daa299813219c5211aefe2051a1160238319..137db873cc09f7e57b258bcf65b8331d8b36b8c0 100644 (file)
@@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor):
         webpage = self._download_webpage(url, title)
         title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
         config_url = self._html_search_regex(
-            r'data-src="(/contenu/medias/video.php.*?)"',
+            r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"',
             webpage, 'config URL')
         config_url = 'http://www.jeuxvideo.com' + config_url
 
index 08a671fa86a007d3327ef03c257f1b943bd425db..61739efa7a4c3b84892083eab10237c23eb69e3d 100644 (file)
@@ -14,10 +14,10 @@ class KhanAcademyIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'http://www.khanacademy.org/video/one-time-pad',
-        'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
+        'md5': '7b391cce85e758fb94f763ddc1bbb979',
         'info_dict': {
             'id': 'one-time-pad',
-            'ext': 'mp4',
+            'ext': 'webm',
             'title': 'The one-time pad',
             'description': 'The perfect cipher',
             'duration': 176,
index 9884362261aa4deab85f8e25718feaaa7a07afce..eada7c299238953baa9fd3d8219b2754aa7f9356 100644 (file)
@@ -14,6 +14,7 @@ from ..utils import (
     xpath_with_ns,
     xpath_text,
     orderedSet,
+    update_url_query,
     int_or_none,
     float_or_none,
     parse_iso8601,
@@ -72,7 +73,10 @@ class LivestreamIE(InfoExtractor):
         for vn in video_nodes:
             tbr = int_or_none(vn.attrib.get('system-bitrate'), 1000)
             furl = (
-                '%s%s?v=3.0.3&fp=WIN%%2014,0,0,145' % (base, vn.attrib['src']))
+                update_url_query(compat_urlparse.urljoin(base, vn.attrib['src']), {
+                    'v': '3.0.3',
+                    'fp': 'WIN% 14,0,0,145',
+                }))
             if 'clipBegin' in vn.attrib:
                 furl += '&ssek=' + vn.attrib['clipBegin']
             formats.append({
index 29ca45778a17654c4d2125ceda177b71cffca8a8..819c1b90bb755c873b3f7f1b64e07dc97126a9b9 100644 (file)
@@ -99,7 +99,7 @@ class OCWMITIE(InfoExtractor):
             'url': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/',
             'info_dict': {
                 'id': 'EObHWIEKGjA',
-                'ext': 'mp4',
+                'ext': 'webm',
                 'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
                 'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
                 'upload_date': '20121109',
index 6d5732d45c3d3e22d085319ff45449881ac73ad2..30a5f2de4475a934cfa467764d0ce559d3e68a74 100644 (file)
@@ -12,14 +12,14 @@ class PyvideoIE(InfoExtractor):
     _TESTS = [
         {
             'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
-            'md5': 'de317418c8bc76b1fd8633e4f32acbc6',
+            'md5': '520915673e53a5c5d487c36e0c4d85b5',
             'info_dict': {
                 'id': '24_4WWkSmNo',
-                'ext': 'mp4',
+                'ext': 'webm',
                 'title': 'Become a logging expert in 30 minutes',
                 'description': 'md5:9665350d466c67fb5b1598de379021f7',
                 'upload_date': '20130320',
-                'uploader': 'NextDayVideo',
+                'uploader': 'Next Day Video',
                 'uploader_id': 'NextDayVideo',
             },
             'add_ie': ['Youtube'],
index b1b8800b97c9eb8caad2c03f999f1bc8f304c4da..99979ebe1a9fe82099076b46b576ef38a58bca8c 100644 (file)
@@ -19,7 +19,7 @@ class Revision3IE(InfoExtractor):
         'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
         'md5': 'd94a72d85d0a829766de4deb8daaf7df',
         'info_dict': {
-            'id': '73034',
+            'id': '71089',
             'display_id': 'technobuffalo/5-google-predictions-for-2016',
             'ext': 'webm',
             'title': '5 Google Predictions for 2016',
@@ -31,6 +31,7 @@ class Revision3IE(InfoExtractor):
             'uploader_id': 'technobuffalo',
         }
     }, {
+        # Show
         'url': 'http://testtube.com/brainstuff',
         'info_dict': {
             'id': '251',
@@ -41,7 +42,7 @@ class Revision3IE(InfoExtractor):
     }, {
         'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
         'info_dict': {
-            'id': '60163',
+            'id': '58227',
             'display_id': 'dnews/5-weird-ways-plants-can-eat-animals',
             'duration': 275,
             'ext': 'webm',
@@ -52,18 +53,72 @@ class Revision3IE(InfoExtractor):
             'uploader': 'DNews',
             'uploader_id': 'dnews',
         },
+    }, {
+        'url': 'http://testtube.com/tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
+        'info_dict': {
+            'id': '71618',
+            'ext': 'mp4',
+            'display_id': 'tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
+            'title': 'The Israel-Palestine Conflict Explained in Ten Minutes',
+            'description': 'If you\'d like to learn about the struggle between Israelis and Palestinians, this video is a great place to start',
+            'uploader': 'Editors\' Picks',
+            'uploader_id': 'tt-editors-picks',
+            'timestamp': 1453309200,
+            'upload_date': '20160120',
+        },
+        'add_ie': ['Youtube'],
+    }, {
+        # Tag
+        'url': 'http://testtube.com/tech-news',
+        'info_dict': {
+            'id': '21018',
+            'title': 'tech news',
+        },
+        'playlist_mincount': 9,
     }]
     _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
     _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
 
     def _real_extract(self, url):
         domain, display_id = re.match(self._VALID_URL, url).groups()
+        site = domain.split('.')[0]
         page_info = self._download_json(
             self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id)
 
-        if page_info['data']['type'] == 'episode':
-            episode_data = page_info['data']
-            video_id = compat_str(episode_data['video']['data']['id'])
+        page_data = page_info['data']
+        page_type = page_data['type']
+        if page_type in ('episode', 'embed'):
+            show_data = page_data['show']['data']
+            page_id = compat_str(page_data['id'])
+            video_id = compat_str(page_data['video']['data']['id'])
+
+            preference = qualities(['mini', 'small', 'medium', 'large'])
+            thumbnails = [{
+                'url': image_url,
+                'id': image_id,
+                'preference': preference(image_id)
+            } for image_id, image_url in page_data.get('images', {}).items()]
+
+            info = {
+                'id': page_id,
+                'display_id': display_id,
+                'title': unescapeHTML(page_data['name']),
+                'description': unescapeHTML(page_data.get('summary')),
+                'timestamp': parse_iso8601(page_data.get('publishTime'), ' '),
+                'author': page_data.get('author'),
+                'uploader': show_data.get('name'),
+                'uploader_id': show_data.get('slug'),
+                'thumbnails': thumbnails,
+                'extractor_key': site,
+            }
+
+            if page_type == 'embed':
+                info.update({
+                    '_type': 'url_transparent',
+                    'url': page_data['video']['data']['embed'],
+                })
+                return info
+
             video_data = self._download_json(
                 'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id),
                 video_id)['items'][0]
@@ -84,36 +139,30 @@ class Revision3IE(InfoExtractor):
                         })
             self._sort_formats(formats)
 
-            preference = qualities(['mini', 'small', 'medium', 'large'])
-            thumbnails = [{
-                'url': image_url,
-                'id': image_id,
-                'preference': preference(image_id)
-            } for image_id, image_url in video_data.get('images', {}).items()]
-
-            return {
-                'id': video_id,
-                'display_id': display_id,
+            info.update({
                 'title': unescapeHTML(video_data['title']),
                 'description': unescapeHTML(video_data.get('summary')),
-                'timestamp': parse_iso8601(episode_data.get('publishTime'), ' '),
-                'author': episode_data.get('author'),
                 'uploader': video_data.get('show', {}).get('name'),
                 'uploader_id': video_data.get('show', {}).get('slug'),
                 'duration': int_or_none(video_data.get('duration')),
-                'thumbnails': thumbnails,
                 'formats': formats,
-            }
+            })
+            return info
         else:
-            show_data = page_info['show']['data']
+            list_data = page_info[page_type]['data']
             episodes_data = page_info['episodes']['data']
             num_episodes = page_info['meta']['totalEpisodes']
             processed_episodes = 0
             entries = []
             page_num = 1
             while True:
-                entries.extend([self.url_result(
-                    'http://%s/%s/%s' % (domain, display_id, episode['slug'])) for episode in episodes_data])
+                entries.extend([{
+                    '_type': 'url',
+                    'url': 'http://%s%s' % (domain, episode['path']),
+                    'id': compat_str(episode['id']),
+                    'ie_key': 'Revision3',
+                    'extractor_key': site,
+                } for episode in episodes_data])
                 processed_episodes += len(episodes_data)
                 if processed_episodes == num_episodes:
                     break
@@ -123,5 +172,5 @@ class Revision3IE(InfoExtractor):
                     display_id)['episodes']['data']
 
             return self.playlist_result(
-                entries, compat_str(show_data['id']),
-                show_data.get('name'), show_data.get('summary'))
+                entries, compat_str(list_data['id']),
+                list_data.get('name'), list_data.get('summary'))
index 7de7b7273523ea8a43a6d22e8ab684afb4fc5875..a65fc8ed706efa02e4d008494ea8269c10ad8b3a 100644 (file)
@@ -36,12 +36,13 @@ class SafariBaseIE(InfoExtractor):
         if username is None:
             self.raise_login_required('safaribooksonline.com account is required')
 
-        headers = std_headers
+        headers = std_headers.copy()
         if 'Referer' not in headers:
             headers['Referer'] = self._LOGIN_URL
+        login_page_request = sanitized_Request(self._LOGIN_URL, headers=headers)
 
         login_page = self._download_webpage(
-            self._LOGIN_URL, None,
+            login_page_request, None,
             'Downloading login form')
 
         csrf = self._html_search_regex(
index 6365a8779d74e2ac9d82ce83c32c404d51e64b2e..a99b2a8e7be1bc9de8a01d6ae2de6fb36055703c 100644 (file)
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
@@ -14,7 +12,7 @@ class SexuIE(InfoExtractor):
             'id': '961791',
             'ext': 'mp4',
             'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
-            'description': 'md5:c5ed8625eb386855d5a7967bd7b77a54',
+            'description': 'md5:2b75327061310a3afb3fbd7d09e2e403',
             'categories': list,  # NSFW
             'thumbnail': 're:https?://.*\.jpg$',
             'age_limit': 18,
@@ -25,13 +23,18 @@ class SexuIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        quality_arr = self._search_regex(
-            r'sources:\s*\[([^\]]+)\]', webpage, 'forrmat string')
+        jwvideo = self._parse_json(
+            self._search_regex(r'\.setup\(\s*({.+?})\s*\);', webpage, 'jwvideo'),
+            video_id)
+
+        sources = jwvideo['sources']
+
         formats = [{
-            'url': fmt[0].replace('\\', ''),
-            'format_id': fmt[1],
-            'height': int(fmt[1][:3]),
-        } for fmt in re.findall(r'"file":"([^"]+)","label":"([^"]+)"', quality_arr)]
+            'url': source['file'].replace('\\', ''),
+            'format_id': source.get('label'),
+            'height': self._search_regex(
+                r'^(\d+)[pP]', source.get('label', ''), 'height', default=None),
+        } for source in sources if source.get('file')]
         self._sort_formats(formats)
 
         title = self._html_search_regex(
@@ -40,9 +43,7 @@ class SexuIE(InfoExtractor):
         description = self._html_search_meta(
             'description', webpage, 'description')
 
-        thumbnail = self._html_search_regex(
-            r'image:\s*"([^"]+)"',
-            webpage, 'thumbnail', fatal=False)
+        thumbnail = jwvideo.get('image')
 
         categories_str = self._html_search_meta(
             'keywords', webpage, 'categories')
index a48d77c309dcd1f9984cd0a6c71b7af574ca5498..cf8851438bb74000abb2692c34607f3137505f1d 100644 (file)
@@ -73,7 +73,7 @@ class TEDIE(InfoExtractor):
         'add_ie': ['Youtube'],
         'info_dict': {
             'id': '_ZG8HBuDjgc',
-            'ext': 'mp4',
+            'ext': 'webm',
             'title': 'Douglas Adams: Parrots the Universe and Everything',
             'description': 'md5:01ad1e199c49ac640cb1196c0e9016af',
             'uploader': 'University of California Television (UCTV)',
index 8639293e35a79875c40b3933b5531381c2b3d077..958bf8fff58b4264742871ba019d9f8c5be21adb 100644 (file)
@@ -252,6 +252,7 @@ class TwitchVodIE(TwitchItemBaseIE):
                 self._USHER_BASE, item_id,
                 compat_urllib_parse.urlencode({
                     'allow_source': 'true',
+                    'allow_audio_only': 'true',
                     'allow_spectre': 'true',
                     'player': 'twitchweb',
                     'nauth': access_token['token'],
@@ -431,6 +432,7 @@ class TwitchStreamIE(TwitchBaseIE):
 
         query = {
             'allow_source': 'true',
+            'allow_audio_only': 'true',
             'p': random.randint(1000000, 10000000),
             'player': 'twitchweb',
             'segment_preference': '4',
index 14e945d494cd2f6e5f3b3e6a03ff6ebb076826dd..e148b1ef513321376efe1795056503ea2a8bcad8 100644 (file)
@@ -20,6 +20,7 @@ class VGTVIE(XstreamIE):
         'aftenbladet.no/tv': 'satv',
         'fvn.no/fvntv': 'fvntv',
         'aftenposten.no/webtv': 'aptv',
+        'ap.vgtv.no/webtv': 'aptv',
     }
 
     _APP_NAME_TO_VENDOR = {
@@ -35,7 +36,7 @@ class VGTVIE(XstreamIE):
                     (?P<host>
                         %s
                     )
-                    /
+                    /?
                     (?:
                         \#!/(?:video|live)/|
                         embed?.*id=
@@ -107,19 +108,27 @@ class VGTVIE(XstreamIE):
             'md5': 'fd828cd29774a729bf4d4425fe192972',
             'info_dict': {
                 'id': '21039',
-                'ext': 'mov',
+                'ext': 'mp4',
                 'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
                 'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
                 'duration': 66,
                 'timestamp': 1417002452,
                 'upload_date': '20141126',
                 'view_count': int,
-            }
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
         },
         {
             'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
             'only_matching': True,
         },
+        {
+            'url': 'http://ap.vgtv.no/webtv#!/video/111084/de-nye-bysyklene-lettere-bedre-gir-stoerre-hjul-og-feste-til-mobil',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):
@@ -144,8 +153,6 @@ class VGTVIE(XstreamIE):
         if len(video_id) == 5:
             if appname == 'bttv':
                 info = self._extract_video_info('btno', video_id)
-            elif appname == 'aptv':
-                info = self._extract_video_info('ap', video_id)
 
         streams = data['streamUrls']
         stream_type = data.get('streamType')
index 433fc9914a1d59fbc6743e8e82815b429a695cc5..e04b814c8cf27755bfe0a86af3d5bf43262bd0da 100644 (file)
@@ -176,13 +176,13 @@ class VikiIE(VikiBaseIE):
     }, {
         # youtube external
         'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
-        'md5': '216d1afdc0c64d1febc1e9f2bd4b864b',
+        'md5': '63f8600c1da6f01b7640eee7eca4f1da',
         'info_dict': {
             'id': '50562v',
-            'ext': 'mp4',
+            'ext': 'webm',
             'title': 'Poor Nastya [COMPLETE] - Episode 1',
             'description': '',
-            'duration': 607,
+            'duration': 606,
             'timestamp': 1274949505,
             'upload_date': '20101213',
             'uploader': 'ad14065n',
index 9f282a1da68ac9889f9cfe667a1a8bc7b8b3a71f..71c30d2cde54f11802f1e187160ae48c0ea88423 100644 (file)
@@ -73,15 +73,26 @@ class VimeoIE(VimeoBaseInfoExtractor):
 
     # _VALID_URL matches Vimeo URLs
     _VALID_URL = r'''(?x)
-        https?://
-        (?:(?:www|(?P<player>player))\.)?
-        vimeo(?P<pro>pro)?\.com/
-        (?!channels/[^/?#]+/?(?:$|[?#])|album/)
-        (?:.*?/)?
-        (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)?
-        (?:videos?/)?
-        (?P<id>[0-9]+)
-        /?(?:[?&].*)?(?:[#].*)?$'''
+                    https?://
+                        (?:
+                            (?:
+                                www|
+                                (?P<player>player)
+                            )
+                            \.
+                        )?
+                        vimeo(?P<pro>pro)?\.com/
+                        (?!channels/[^/?#]+/?(?:$|[?#])|(?:album|ondemand)/)
+                        (?:.*?/)?
+                        (?:
+                            (?:
+                                play_redirect_hls|
+                                moogaloop\.swf)\?clip_id=
+                            )?
+                        (?:videos?/)?
+                        (?P<id>[0-9]+)
+                        /?(?:[?&].*)?(?:[#].*)?$
+                    '''
     IE_NAME = 'vimeo'
     _TESTS = [
         {
@@ -277,9 +288,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
 
     def _real_extract(self, url):
         url, data = unsmuggle_url(url, {})
-        headers = std_headers
+        headers = std_headers.copy()
         if 'http_headers' in data:
-            headers = headers.copy()
             headers.update(data['http_headers'])
         if 'Referer' not in headers:
             headers['Referer'] = url
@@ -294,7 +304,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
             url = 'https://vimeo.com/' + video_id
 
         # Retrieve video webpage to extract further information
-        request = sanitized_Request(url, None, headers)
+        request = sanitized_Request(url, headers=headers)
         try:
             webpage = self._download_webpage(request, video_id)
         except ExtractorError as ee:
@@ -498,6 +508,38 @@ class VimeoIE(VimeoBaseInfoExtractor):
         }
 
 
+class VimeoOndemandIE(VimeoBaseInfoExtractor):
+    IE_NAME = 'vimeo:ondemand'
+    _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        # ondemand video not available via https://vimeo.com/id
+        'url': 'https://vimeo.com/ondemand/20704',
+        'md5': 'c424deda8c7f73c1dfb3edd7630e2f35',
+        'info_dict': {
+            'id': '105442900',
+            'ext': 'mp4',
+            'title': 'המעבדה - במאי יותם פלדמן',
+            'uploader': 'גם סרטים',
+            'uploader_url': 're:https?://(?:www\.)?vimeo\.com/gumfilms',
+            'uploader_id': 'gumfilms',
+        },
+    }, {
+        'url': 'https://vimeo.com/ondemand/nazmaalik',
+        'only_matching': True,
+    }, {
+        'url': 'https://vimeo.com/ondemand/141692381',
+        'only_matching': True,
+    }, {
+        'url': 'https://vimeo.com/ondemand/thelastcolony/150274832',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        return self.url_result(self._og_search_video_url(webpage), VimeoIE.ie_key())
+
+
 class VimeoChannelIE(VimeoBaseInfoExtractor):
     IE_NAME = 'vimeo:channel'
     _VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
index 670a438afff624b5c9f09d0b79fb34fc4e237a7a..d560a4b5e219c2d62cff17da8e47c3cfbb5f87ba 100644 (file)
@@ -142,10 +142,10 @@ class VKIE(InfoExtractor):
             'url': 'https://vk.com/video276849682_170681728',
             'info_dict': {
                 'id': 'V3K4mi0SYkc',
-                'ext': 'mp4',
+                'ext': 'webm',
                 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
                 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
-                'duration': 179,
+                'duration': 178,
                 'upload_date': '20130116',
                 'uploader': "Children's Joy Foundation",
                 'uploader_id': 'thecjf',
index 041ff6c555123d44c97bc63810d2aa7903ec069e..fb0accac744532625c04bb964c1fa031723ed8ff 100644 (file)
@@ -20,7 +20,7 @@ class WimpIE(InfoExtractor):
         'md5': '4e2986c793694b55b37cf92521d12bb4',
         'info_dict': {
             'id': 'clowncar',
-            'ext': 'mp4',
+            'ext': 'webm',
             'title': 'It\'s like a clown car.',
             'description': 'md5:0e56db1370a6e49c5c1d19124c0d2fb2',
         },
index 0d8ef6ca26c6ef7f1b7b402b387d20eebd3f8a8f..3ea5183999d5ed2adacbb05bccc6af93e8ac6750 100644 (file)
@@ -6,6 +6,7 @@ from .ffmpeg import (
     FFmpegEmbedSubtitlePP,
     FFmpegExtractAudioPP,
     FFmpegFixupStretchedPP,
+    FFmpegFixupM3u8PP,
     FFmpegFixupM4aPP,
     FFmpegMergerPP,
     FFmpegMetadataPP,
@@ -26,6 +27,7 @@ __all__ = [
     'ExecAfterDownloadPP',
     'FFmpegEmbedSubtitlePP',
     'FFmpegExtractAudioPP',
+    'FFmpegFixupM3u8PP',
     'FFmpegFixupM4aPP',
     'FFmpegFixupStretchedPP',
     'FFmpegMergerPP',
index 380bc6f292f2390fdf5dabd131a20184ba8334df..81102f9bba83cbfb299222a5aa5c06fdd579d7e6 100644 (file)
@@ -391,10 +391,6 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
         for (name, value) in metadata.items():
             options.extend(['-metadata', '%s=%s' % (name, value)])
 
-        # https://github.com/rg3/youtube-dl/issues/8350
-        if info.get('protocol') == 'm3u8_native' or info.get('protocol') == 'm3u8' and self._downloader.params.get('hls_prefer_native', False):
-            options.extend(['-bsf:a', 'aac_adtstoasc'])
-
         self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
         self.run_ffmpeg(filename, temp_filename, options)
         os.remove(encodeFilename(filename))
@@ -467,6 +463,21 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor):
         return [], info
 
 
+class FFmpegFixupM3u8PP(FFmpegPostProcessor):
+    def run(self, info):
+        filename = info['filepath']
+        temp_filename = prepend_extension(filename, 'temp')
+
+        options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
+        self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename)
+        self.run_ffmpeg(filename, temp_filename, options)
+
+        os.remove(encodeFilename(filename))
+        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+        return [], info
+
+
 class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
     def __init__(self, downloader=None, format=None):
         super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
index adafd601b2178897e069627b6de7e744d6adf3ce..246f5740d2920dcf9ce4c586ded6c6f47767235c 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2016.03.01'
+__version__ = '2016.03.06'