Merge branch 'Phaeilo-presstv'

author Yen Chi Hsuan <yan12125@gmail.com>

Mon, 11 Apr 2016 08:17:05 +0000 (16:17 +0800)

committer Yen Chi Hsuan <yan12125@gmail.com>

Mon, 11 Apr 2016 08:17:05 +0000 (16:17 +0800)
author Yen Chi Hsuan <yan12125@gmail.com>
Mon, 11 Apr 2016 08:17:05 +0000 (16:17 +0800)
committer Yen Chi Hsuan <yan12125@gmail.com>
Mon, 11 Apr 2016 08:17:05 +0000 (16:17 +0800)
diff --git a/youtube_dl/extractor/cliprs.py b/youtube_dl/extractor/cliprs.py

new file mode 100644 (file)

index 0000000..4f9320e
--- /dev/null
+++ b/youtube_dl/extractor/cliprs.py
@@ -0,0 +1,90 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    float_or_none,
+    int_or_none,
+    parse_iso8601,
+)
+
+
+class ClipRsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
+    _TEST = {
+        'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
+        'md5': 'c412d57815ba07b56f9edc7b5d6a14e5',
+        'info_dict': {
+            'id': '1488842.1399140381',
+            'ext': 'mp4',
+            'title': 'PREMIJERA Frajle predstavljaju novi spot za pesmu Moli me, moli',
+            'description': 'md5:56ce2c3b4ab31c5a2e0b17cb9a453026',
+            'duration': 229,
+            'timestamp': 1459850243,
+            'upload_date': '20160405',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_id = self._search_regex(
+            r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
+
+        response = self._download_json(
+            'http://qi.ckm.onetapi.pl/', video_id,
+            query={
+                'body[id]': video_id,
+                'body[jsonrpc]': '2.0',
+                'body[method]': 'get_asset_detail',
+                'body[params][ID_Publikacji]': video_id,
+                'body[params][Service]': 'www.onet.pl',
+                'content-type': 'application/jsonp',
+                'x-onet-app': 'player.front.onetapi.pl',
+            })
+
+        error = response.get('error')
+        if error:
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, error['message']), expected=True)
+
+        video = response['result'].get('0')
+
+        formats = []
+        for _, formats_dict in video['formats'].items():
+            if not isinstance(formats_dict, dict):
+                continue
+            for format_id, format_list in formats_dict.items():
+                if not isinstance(format_list, list):
+                    continue
+                for f in format_list:
+                    if not f.get('url'):
+                        continue
+                    formats.append({
+                        'url': f['url'],
+                        'format_id': format_id,
+                        'height': int_or_none(f.get('vertical_resolution')),
+                        'width': int_or_none(f.get('horizontal_resolution')),
+                        'abr': float_or_none(f.get('audio_bitrate')),
+                        'vbr': float_or_none(f.get('video_bitrate')),
+                    })
+        self._sort_formats(formats)
+
+        meta = video.get('meta', {})
+
+        title = self._og_search_title(webpage, default=None) or meta['title']
+        description = self._og_search_description(webpage, default=None) or meta.get('description')
+        duration = meta.get('length') or meta.get('lenght')
+        timestamp = parse_iso8601(meta.get('addDate'), ' ')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'timestamp': timestamp,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 17d00721caad1a82f93a82458d0efdd40697abcd..5269059d0d9e080a2e3650f7ce30a8e8d3422d96 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -376,7 +376,6 @@ class InfoExtractor(object):
                  self.to_screen('%s' % (note,))
              else:
                  self.to_screen('%s: %s' % (video_id, note))
-        # data, headers and query params will be ignored for `Request` objects
          if isinstance(url_or_request, compat_urllib_request.Request):
              url_or_request = update_Request(
                  url_or_request, data=data, headers=headers, query=query)
diff --git a/youtube_dl/extractor/ebaumsworld.py b/youtube_dl/extractor/ebaumsworld.py

index b6bfd2b2dedc5388ef383a3cd8853bbb0c541f68..c97682cd367edebfd9fc6a476ad073cb03240054 100644 (file)
--- a/youtube_dl/extractor/ebaumsworld.py
+++ b/youtube_dl/extractor/ebaumsworld.py
@@ -4,10 +4,10 @@ from .common import InfoExtractor
  
  
  class EbaumsWorldIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ebaumsworld\.com/videos/[^/]+/(?P<id>\d+)'
  
      _TEST = {
-        'url': 'http://www.ebaumsworld.com/video/watch/83367677/',
+        'url': 'http://www.ebaumsworld.com/videos/a-giant-python-opens-the-door/83367677/',
          'info_dict': {
              'id': '83367677',
              'ext': 'mp4',
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py

index c2fa83918f1b86a98692cf8f26298eb478610d41..c234ff12732f21ce84bb4f595404e797971fff79 100644 (file)
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -123,6 +123,7 @@ from .chirbit import (
  )
  from .cinchcast import CinchcastIE
  from .cinemassacre import CinemassacreIE
+from .cliprs import ClipRsIE
  from .clipfish import ClipfishIE
  from .cliphunter import CliphunterIE
  from .clipsyndicate import ClipsyndicateIE
diff --git a/youtube_dl/extractor/glide.py b/youtube_dl/extractor/glide.py

index 9561ed5fbaa25404654303956a676b000da2af67..62ff84835c87b28d18ace1afa5eee19f894d198d 100644 (file)
--- a/youtube_dl/extractor/glide.py
+++ b/youtube_dl/extractor/glide.py
@@ -2,6 +2,7 @@
  from __future__ import unicode_literals
  
  from .common import InfoExtractor
+from ..utils import unified_strdate
  
  
  class GlideIE(InfoExtractor):
@@ -15,26 +16,38 @@ class GlideIE(InfoExtractor):
              'ext': 'mp4',
              'title': 'Damon Timm\'s Glide message',
              'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$',
+            'uploader': 'Damon Timm',
+            'upload_date': '20140919',
          }
      }
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
+
          webpage = self._download_webpage(url, video_id)
+
          title = self._html_search_regex(
-            r'<title>(.*?)</title>', webpage, 'title')
-        video_url = self.http_scheme() + self._search_regex(
-            r'<source src="(.*?)" type="video/mp4">', webpage, 'video URL')
-        thumbnail_url = self._search_regex(
-            r'<img id="video-thumbnail" src="(.*?)"',
-            webpage, 'thumbnail url', fatal=False)
-        thumbnail = (
-            thumbnail_url if thumbnail_url is None
-            else self.http_scheme() + thumbnail_url)
+            r'<title>(.+?)</title>', webpage, 'title')
+        video_url = self._proto_relative_url(self._search_regex(
+            r'<source[^>]+src=(["\'])(?P<url>.+?)\1',
+            webpage, 'video URL', default=None,
+            group='url')) or self._og_search_video_url(webpage)
+        thumbnail = self._proto_relative_url(self._search_regex(
+            r'<img[^>]+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P<url>.+?)\1',
+            webpage, 'thumbnail url', default=None,
+            group='url')) or self._og_search_thumbnail(webpage)
+        uploader = self._search_regex(
+            r'<div[^>]+class=["\']info-name["\'][^>]*>([^<]+)',
+            webpage, 'uploader', fatal=False)
+        upload_date = unified_strdate(self._search_regex(
+            r'<div[^>]+class="info-date"[^>]*>([^<]+)',
+            webpage, 'upload date', fatal=False))
  
          return {
              'id': video_id,
              'title': title,
              'url': video_url,
              'thumbnail': thumbnail,
+            'uploader': uploader,
+            'upload_date': upload_date,
          }
diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py

index 6770685d7027c3738fba35f3e057f6be2a3a512c..8a5e562dbc24fac4d18498e631e8f5e10d8fe038 100644 (file)
--- a/youtube_dl/extractor/jwplatform.py
+++ b/youtube_dl/extractor/jwplatform.py
@@ -4,16 +4,15 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    float_or_none,
+    int_or_none,
+)
  
  
  class JWPlatformBaseIE(InfoExtractor):
      def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True):
          video_data = jwplayer_data['playlist'][0]
-        subtitles = {}
-        for track in video_data['tracks']:
-            if track['kind'] == 'captions':
-                subtitles[track['label']] = [{'url': self._proto_relative_url(track['file'])}]
  
          formats = []
          for source in video_data['sources']:
@@ -35,12 +34,22 @@ class JWPlatformBaseIE(InfoExtractor):
                  })
          self._sort_formats(formats)
  
+        subtitles = {}
+        tracks = video_data.get('tracks')
+        if tracks and isinstance(tracks, list):
+            for track in tracks:
+                if track.get('file') and track.get('kind') == 'captions':
+                    subtitles.setdefault(track.get('label') or 'en', []).append({
+                        'url': self._proto_relative_url(track['file'])
+                    })
+
          return {
              'id': video_id,
              'title': video_data['title'] if require_title else video_data.get('title'),
              'description': video_data.get('description'),
              'thumbnail': self._proto_relative_url(video_data.get('image')),
              'timestamp': int_or_none(video_data.get('pubdate')),
+            'duration': float_or_none(jwplayer_data.get('duration')),
              'subtitles': subtitles,
              'formats': formats,
          }
diff --git a/youtube_dl/extractor/screencastomatic.py b/youtube_dl/extractor/screencastomatic.py

index 05337421ca4210af5a9a797f22c112bb663a0960..7a88a42cd84dbfd9f343567dffb5f462c10329b7 100644 (file)
--- a/youtube_dl/extractor/screencastomatic.py
+++ b/youtube_dl/extractor/screencastomatic.py
@@ -1,15 +1,11 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
-    ExtractorError,
-    js_to_json,
-)
+from .jwplatform import JWPlatformBaseIE
+from ..utils import js_to_json
  
  
-class ScreencastOMaticIE(InfoExtractor):
+class ScreencastOMaticIE(JWPlatformBaseIE):
      _VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
      _TEST = {
          'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
@@ -20,6 +16,7 @@ class ScreencastOMaticIE(InfoExtractor):
              'title': 'Welcome to 3-4 Philosophy @ DECV!',
              'thumbnail': 're:^https?://.*\.jpg$',
              'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
+            'duration': 369.163,
          }
      }
  
@@ -27,23 +24,14 @@ class ScreencastOMaticIE(InfoExtractor):
          video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
-        setup_js = self._search_regex(
-            r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);",
-            webpage, 'setup code')
-        data = self._parse_json(setup_js, video_id, transform_source=js_to_json)
-        try:
-            video_data = next(
-                m for m in data['modes'] if m.get('type') == 'html5')
-        except StopIteration:
-            raise ExtractorError('Could not find any video entries!')
-        video_url = compat_urlparse.urljoin(url, video_data['config']['file'])
-        thumbnail = data.get('image')
+        jwplayer_data = self._parse_json(
+            self._search_regex(
+                r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);", webpage, 'setup code'),
+            video_id, transform_source=js_to_json)
  
-        return {
-            'id': video_id,
+        info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
+        info_dict.update({
              'title': self._og_search_title(webpage),
              'description': self._og_search_description(webpage),
-            'url': video_url,
-            'ext': 'mp4',
-            'thumbnail': thumbnail,
-        }
+        })
+        return info_dict
diff --git a/youtube_dl/extractor/telebruxelles.py b/youtube_dl/extractor/telebruxelles.py

index a3d05f97d681b6cb4da6adf179a4f0a5744e5123..eefecc490c5d13476259497e79f7a3ebe68caee7 100644 (file)
--- a/youtube_dl/extractor/telebruxelles.py
+++ b/youtube_dl/extractor/telebruxelles.py
@@ -1,11 +1,13 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
  
  
  class TeleBruxellesIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?telebruxelles\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)'
      _TESTS = [{
          'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/',
          'md5': '59439e568c9ee42fb77588b2096b214f',
@@ -39,18 +41,18 @@ class TeleBruxellesIE(InfoExtractor):
          webpage = self._download_webpage(url, display_id)
  
          article_id = self._html_search_regex(
-            r"<article id=\"post-(\d+)\"", webpage, 'article ID')
+            r"<article id=\"post-(\d+)\"", webpage, 'article ID', default=None)
          title = self._html_search_regex(
              r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title')
-        description = self._og_search_description(webpage)
+        description = self._og_search_description(webpage, default=None)
  
          rtmp_url = self._html_search_regex(
-            r"file: \"(rtmp://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}/vod/mp4:\" \+ \"\w+\" \+ \".mp4)\"",
+            r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"',
              webpage, 'RTMP url')
-        rtmp_url = rtmp_url.replace("\" + \"", "")
+        rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
  
          return {
-            'id': article_id,
+            'id': article_id or display_id,
              'display_id': display_id,
              'title': title,
              'description': description,
author	Yen Chi Hsuan <yan12125@gmail.com>
	Mon, 11 Apr 2016 08:17:05 +0000 (16:17 +0800)
committer	Yen Chi Hsuan <yan12125@gmail.com>
	Mon, 11 Apr 2016 08:17:05 +0000 (16:17 +0800)
youtube_dl/extractor/cliprs.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/ebaumsworld.py		patch \| blob \| history
youtube_dl/extractor/extractors.py		patch \| blob \| history
youtube_dl/extractor/glide.py		patch \| blob \| history
youtube_dl/extractor/jwplatform.py		patch \| blob \| history
youtube_dl/extractor/screencastomatic.py		patch \| blob \| history
youtube_dl/extractor/telebruxelles.py		patch \| blob \| history