Merge pull request #8228 from jaimeMF/disable-file-handler

author Sergey M <dstftw@gmail.com>

Thu, 14 Jan 2016 17:20:02 +0000 (22:20 +0500)

committer Sergey M <dstftw@gmail.com>

Thu, 14 Jan 2016 17:20:02 +0000 (22:20 +0500)
author Sergey M <dstftw@gmail.com>
Thu, 14 Jan 2016 17:20:02 +0000 (22:20 +0500)
committer Sergey M <dstftw@gmail.com>
Thu, 14 Jan 2016 17:20:02 +0000 (22:20 +0500)
diff --git a/AUTHORS b/AUTHORS

index 3d8bebbb06098836809857316233a9007a69840f..f4238e1d34112356ef212116b9c3a1854316b3f2 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -152,3 +152,4 @@ Evan Lu
  flatgreen
  Brian Foley
  Vignesh Venkat
+Tom Gijselinck
diff --git a/docs/supportedsites.md b/docs/supportedsites.md

index 8d0c7b97a3c2dff65f9838c8a2f0c3f36837eee8..eb160bd2fafac9d80b297fd0089ef4d966f44885 100644 (file)
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -65,6 +65,7 @@
   - **Beeg**
   - **BehindKink**
   - **Bet**
+ - **Bigflix**
   - **Bild**: Bild.de
   - **BiliBili**
   - **BleacherReport**
@@ -251,7 +252,7 @@
   - **Instagram**
   - **instagram:user**: Instagram user profile
   - **InternetVideoArchive**
- - **IPrima**
+ - **IPrima** (Currently broken)
   - **iqiyi**: 爱奇艺
   - **Ir90Tv**
   - **ivi**: ivi.ru
@@ -602,7 +603,9 @@
   - **TruTube**
   - **Tube8**
   - **TubiTv**
- - **Tudou**
+ - **tudou**
+ - **tudou:album**
+ - **tudou:playlist**
   - **Tumblr**
   - **tunein:clip**
   - **tunein:program**
@@ -655,12 +658,12 @@
   - **video.mit.edu**
   - **VideoDetective**
   - **videofy.me**
- - **VideoMega**
+ - **VideoMega** (Currently broken)
   - **videomore**
   - **videomore:season**
   - **videomore:video**
   - **VideoPremium**
- - **VideoTt**: video.tt - Your True Tube
+ - **VideoTt**: video.tt - Your True Tube (Currently broken)
   - **videoweed**: VideoWeed
   - **Vidme**
   - **Vidzi**
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index e4ae9332d28ccf180cca39bf852547489dc03db4..5621c9eb0ffd5556077cf626be261df237fb7e4a 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -86,6 +86,7 @@ from .camdemy import (
  )
  from .canalplus import CanalplusIE
  from .canalc2 import Canalc2IE
+from .canvas import CanvasIE
  from .cbs import CBSIE
  from .cbsnews import CBSNewsIE
  from .cbssports import CBSSportsIE
diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py

index c8d921daf1c7c25d548945fbb104cb5b7181a538..34c2a756fba11f81516e87e095ef1e02d5e65417 100644 (file)
--- a/youtube_dl/extractor/beeg.py
+++ b/youtube_dl/extractor/beeg.py
@@ -34,7 +34,7 @@ class BeegIE(InfoExtractor):
          video_id = self._match_id(url)
  
          video = self._download_json(
-            'http://beeg.com/api/v5/video/%s' % video_id, video_id)
+            'https://api.beeg.com/api/v5/video/%s' % video_id, video_id)
  
          def split(o, e):
              def cut(s, x):
@@ -60,7 +60,7 @@ class BeegIE(InfoExtractor):
  
          def decrypt_url(encrypted_url):
              encrypted_url = self._proto_relative_url(
-                encrypted_url.replace('{DATA_MARKERS}', ''), 'http:')
+                encrypted_url.replace('{DATA_MARKERS}', ''), 'https:')
              key = self._search_regex(
                  r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None)
              if not key:
diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py

new file mode 100644 (file)

index 0000000..ee19ff8
--- /dev/null
+++ b/youtube_dl/extractor/canvas.py
@@ -0,0 +1,65 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import float_or_none
+
+
+class CanvasIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?canvas\.be/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
+        'md5': 'ea838375a547ac787d4064d8c7860a6c',
+        'info_dict': {
+            'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
+            'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
+            'ext': 'mp4',
+            'title': 'De afspraak veilt voor de Warmste Week',
+            'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 49.02,
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        title = self._search_regex(
+            r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
+            webpage, 'title', default=None) or self._og_search_title(webpage)
+
+        video_id = self._html_search_regex(
+            r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'video id', group='id')
+
+        data = self._download_json(
+            'https://mediazone.vrt.be/api/v1/canvas/assets/%s' % video_id, display_id)
+
+        formats = []
+        for target in data['targetUrls']:
+            format_url, format_type = target.get('url'), target.get('type')
+            if not format_url or not format_type:
+                continue
+            if format_type == 'HLS':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, display_id, entry_protocol='m3u8_native',
+                    ext='mp4', preference=0, fatal=False, m3u8_id=format_type))
+            elif format_type == 'HDS':
+                formats.extend(self._extract_f4m_formats(
+                    format_url, display_id, f4m_id=format_type, fatal=False))
+            else:
+                formats.append({
+                    'format_id': format_type,
+                    'url': format_url,
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': self._og_search_description(webpage),
+            'formats': formats,
+            'duration': float_or_none(data.get('duration'), 1000),
+            'thumbnail': data.get('posterImageUrl'),
+        }
diff --git a/youtube_dl/extractor/ntvde.py b/youtube_dl/extractor/ntvde.py

index d2cfe096192f6a44fd0f9dad2ece1474a0d845ab..a83e85cb8109ef44468851355f2b522e22fc5831 100644 (file)
--- a/youtube_dl/extractor/ntvde.py
+++ b/youtube_dl/extractor/ntvde.py
@@ -2,6 +2,7 @@
  from __future__ import unicode_literals
  
  from .common import InfoExtractor
+from ..compat import compat_urlparse
  from ..utils import (
      int_or_none,
      js_to_json,
@@ -34,7 +35,7 @@ class NTVDeIE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
  
          info = self._parse_json(self._search_regex(
-            r'(?s)ntv.pageInfo.article =\s(\{.*?\});', webpage, 'info'),
+            r'(?s)ntv\.pageInfo\.article\s*=\s*(\{.*?\});', webpage, 'info'),
              video_id, transform_source=js_to_json)
          timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp'))
          vdata = self._parse_json(self._search_regex(
@@ -42,18 +43,24 @@ class NTVDeIE(InfoExtractor):
              webpage, 'player data'),
              video_id, transform_source=js_to_json)
          duration = parse_duration(vdata.get('duration'))
-        formats = [{
-            'format_id': 'flash',
-            'url': 'rtmp://fms.n-tv.de/' + vdata['video'],
-        }, {
-            'format_id': 'mobile',
-            'url': 'http://video.n-tv.de' + vdata['videoMp4'],
-            'tbr': 400,  # estimation
-        }]
-        m3u8_url = 'http://video.n-tv.de' + vdata['videoM3u8']
-        formats.extend(self._extract_m3u8_formats(
-            m3u8_url, video_id, ext='mp4',
-            entry_protocol='m3u8_native', preference=0))
+
+        formats = []
+        if vdata.get('video'):
+            formats.append({
+                'format_id': 'flash',
+                'url': 'rtmp://fms.n-tv.de/%s' % vdata['video'],
+            })
+        if vdata.get('videoMp4'):
+            formats.append({
+                'format_id': 'mobile',
+                'url': compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoMp4']),
+                'tbr': 400,  # estimation
+            })
+        if vdata.get('videoM3u8'):
+            m3u8_url = compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoM3u8'])
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+                preference=0, m3u8_id='hls', fatal=False))
          self._sort_formats(formats)
  
          return {
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py

index 2e6c9872b5d251be4eb3c61addab113aad4d2416..c54775d54ab628203beb41c0dd52705c35821a55 100644 (file)
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -170,7 +170,21 @@ class ORFOE1IE(InfoExtractor):
  class ORFFM4IE(InfoExtractor):
      IE_NAME = 'orf:fm4'
      IE_DESC = 'radio FM4'
-    _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
+    _VALID_URL = r'http://fm4\.orf\.at/(?:7tage/?#|player/)(?P<date>[0-9]+)/(?P<show>\w+)'
+
+    _TEST = {
+        'url': 'http://fm4.orf.at/player/20160110/IS/',
+        'md5': '01e736e8f1cef7e13246e880a59ad298',
+        'info_dict': {
+            'id': '2016-01-10_2100_tl_54_7DaysSun13_11244',
+            'ext': 'mp3',
+            'title': 'Im Sumpf',
+            'description': 'md5:384c543f866c4e422a55f66a62d669cd',
+            'duration': 7173,
+            'timestamp': 1452456073,
+            'upload_date': '20160110',
+        },
+    }
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py

index baa54a3afd10244ef2cda281c0785b24fe19e9cb..670e6950f3fcc67e78ecd466475e7317c8dfd826 100644 (file)
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -20,7 +20,7 @@ from ..utils import (
  class ProSiebenSat1IE(InfoExtractor):
      IE_NAME = 'prosiebensat1'
      IE_DESC = 'ProSiebenSat.1 Digital'
-    _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)'
  
      _TESTS = [
          {
@@ -32,7 +32,7 @@ class ProSiebenSat1IE(InfoExtractor):
              'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
              'info_dict': {
                  'id': '2104602',
-                'ext': 'mp4',
+                'ext': 'flv',
                  'title': 'Episode 18 - Staffel 2',
                  'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
                  'upload_date': '20131231',
@@ -138,14 +138,13 @@ class ProSiebenSat1IE(InfoExtractor):
              'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
              'info_dict': {
                  'id': '2572814',
-                'ext': 'mp4',
+                'ext': 'flv',
                  'title': 'Andreas Kümmert: Rocket Man',
                  'description': 'md5:6ddb02b0781c6adf778afea606652e38',
                  'upload_date': '20131017',
                  'duration': 469.88,
              },
              'params': {
-                # rtmp download
                  'skip_download': True,
              },
          },
@@ -153,13 +152,12 @@ class ProSiebenSat1IE(InfoExtractor):
              'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',
              'info_dict': {
                  'id': '2156342',
-                'ext': 'mp4',
+                'ext': 'flv',
                  'title': 'Kurztrips zum Valentinstag',
-                'description': 'Romantischer Kurztrip zum Valentinstag? Wir verraten, was sich hier wirklich lohnt.',
+                'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.',
                  'duration': 307.24,
              },
              'params': {
-                # rtmp download
                  'skip_download': True,
              },
          },
@@ -172,12 +170,26 @@ class ProSiebenSat1IE(InfoExtractor):
              },
              'playlist_count': 2,
          },
+        {
+            'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge',
+            'info_dict': {
+                'id': '4187506',
+                'ext': 'flv',
+                'title': 'Best of Circus HalliGalli',
+                'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9',
+                'upload_date': '20151229',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
      ]
  
      _CLIPID_REGEXES = [
          r'"clip_id"\s*:\s+"(\d+)"',
          r'clipid: "(\d+)"',
          r'clip[iI]d=(\d+)',
+        r'clip[iI]d\s*=\s*["\'](\d+)',
          r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
      ]
      _TITLE_REGEXES = [
@@ -186,12 +198,16 @@ class ProSiebenSat1IE(InfoExtractor):
          r'<!-- start video -->\s*<h1>(.+?)</h1>',
          r'<h1 class="att-name">\s*(.+?)</h1>',
          r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>',
+        r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>',
+        r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>',
      ]
      _DESCRIPTION_REGEXES = [
          r'<p itemprop="description">\s*(.+?)</p>',
          r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',
          r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
          r'<p class="att-description">\s*(.+?)\s*</p>',
+        r'<p class="video-description" itemprop="description">\s*(.+?)</p>',
+        r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',
      ]
      _UPLOAD_DATE_REGEXES = [
          r'<meta property="og:published_time" content="(.+?)">',
diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py

index f70978299ac9e682f5cdb99a7396541fd08c115c..594bee4f9a681f928f37887270b062b6e7079514 100644 (file)
--- a/youtube_dl/extractor/unistra.py
+++ b/youtube_dl/extractor/unistra.py
@@ -38,7 +38,7 @@ class UnistraIE(InfoExtractor):
  
          webpage = self._download_webpage(url, video_id)
  
-        files = set(re.findall(r'file\s*:\s*"([^"]+)"', webpage))
+        files = set(re.findall(r'file\s*:\s*"(/[^"]+)"', webpage))
  
          quality = qualities(['SD', 'HD'])
          formats = []
diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py

index 357594a11debd4e4946e6fd29b0f2b4d4fb241b9..a97995a6dfd92383c2d25ea7250142404176ecad 100644 (file)
--- a/youtube_dl/extractor/vodlocker.py
+++ b/youtube_dl/extractor/vodlocker.py
@@ -5,12 +5,13 @@ from .common import InfoExtractor
  from ..compat import compat_urllib_parse
  from ..utils import (
      ExtractorError,
+    NO_DEFAULT,
      sanitized_Request,
  )
  
  
  class VodlockerIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?vodlocker\.com/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
+    _VALID_URL = r'https?://(?:www\.)?vodlocker\.(?:com|city)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
  
      _TESTS = [{
          'url': 'http://vodlocker.com/e8wvyzz4sl42',
@@ -43,16 +44,31 @@ class VodlockerIE(InfoExtractor):
              webpage = self._download_webpage(
                  req, video_id, 'Downloading video page')
  
+        def extract_file_url(html, default=NO_DEFAULT):
+            return self._search_regex(
+                r'file:\s*"(http[^\"]+)",', html, 'file url', default=default)
+
+        video_url = extract_file_url(webpage, default=None)
+
+        if not video_url:
+            embed_url = self._search_regex(
+                r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?vodlocker\.(?:com|city)/embed-.+?)\1',
+                webpage, 'embed url', group='url')
+            embed_webpage = self._download_webpage(
+                embed_url, video_id, 'Downloading embed webpage')
+            video_url = extract_file_url(embed_webpage)
+            thumbnail_webpage = embed_webpage
+        else:
+            thumbnail_webpage = webpage
+
          title = self._search_regex(
              r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title')
          thumbnail = self._search_regex(
-            r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
-        url = self._search_regex(
-            r'file:\s*"(http[^\"]+)",', webpage, 'file url')
+            r'image:\s*"(http[^\"]+)",', thumbnail_webpage, 'thumbnail', fatal=False)
  
          formats = [{
              'format_id': 'sd',
-            'url': url,
+            'url': video_url,
          }]
  
          return {
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 7030903c0aa4f7fc17c8f185fe31c6c1d3b0e450..4d433b6678842a59b34563491e3c552c607b28b1 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2016.01.09'
+__version__ = '2016.01.14'
author	Sergey M <dstftw@gmail.com>
	Thu, 14 Jan 2016 17:20:02 +0000 (22:20 +0500)
committer	Sergey M <dstftw@gmail.com>
	Thu, 14 Jan 2016 17:20:02 +0000 (22:20 +0500)
AUTHORS		patch \| blob \| history
docs/supportedsites.md		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/beeg.py		patch \| blob \| history
youtube_dl/extractor/canvas.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/ntvde.py		patch \| blob \| history
youtube_dl/extractor/orf.py		patch \| blob \| history
youtube_dl/extractor/prosiebensat1.py		patch \| blob \| history
youtube_dl/extractor/unistra.py		patch \| blob \| history
youtube_dl/extractor/vodlocker.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history