[tv2] Add extractor (#5724)

author Sergey M․ <dstftw@gmail.com>

Sat, 16 May 2015 21:01:52 +0000 (03:01 +0600)

committer Sergey M․ <dstftw@gmail.com>

Sat, 16 May 2015 21:01:52 +0000 (03:01 +0600)
author Sergey M․ <dstftw@gmail.com>
Sat, 16 May 2015 21:01:52 +0000 (03:01 +0600)
committer Sergey M․ <dstftw@gmail.com>
Sat, 16 May 2015 21:01:52 +0000 (03:01 +0600)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 6b19eb6f822f2ef4cacf012b06998e281c2d08c4..fb4f63ca3aef64530329b386307e962944f989ae 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -572,6 +572,7 @@ from .tumblr import TumblrIE
  from .tunein import TuneInIE
  from .turbo import TurboIE
  from .tutv import TutvIE
  from .tunein import TuneInIE
  from .turbo import TurboIE
  from .tutv import TutvIE
+from .tv2 import TV2IE
  from .tv4 import TV4IE
  from .tvigle import TvigleIE
  from .tvp import TvpIE, TvpSeriesIE
  from .tv4 import TV4IE
  from .tvigle import TvigleIE
  from .tvp import TvpIE, TvpSeriesIE
diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py

new file mode 100644 (file)

index 0000000..2dcc0e9
--- /dev/null
+++ b/youtube_dl/extractor/tv2.py
@@ -0,0 +1,93 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    float_or_none,
+    parse_iso8601,
+)
+
+
+class TV2IE(InfoExtractor):
+    _VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.tv2.no/v/916509/',
+        'md5': '9cb9e3410b18b515d71892f27856e9b1',
+        'info_dict': {
+            'id': '916509',
+            'ext': 'flv',
+            'title': 'Se Gryttens hyllest av Steven Gerrard',
+            'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
+            'timestamp': 1431715610,
+            'upload_date': '20150515',
+            'duration': 156.967,
+            'view_count': int,
+            'categories': list,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        formats = []
+        format_urls = []
+        for protocol in ('HDS', 'HLS'):
+            data = self._download_json(
+                'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol),
+                video_id, 'Downloading play JSON')['playback']
+            for item in data['items']['item']:
+                video_url = item.get('url')
+                if not video_url or video_url in format_urls:
+                    continue
+                format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat'))
+                if not self._is_valid_url(video_url, video_id, format_id):
+                    continue
+                format_urls.append(video_url)
+                ext = determine_ext(video_url)
+                if ext == 'f4m':
+                    formats.extend(self._extract_f4m_formats(
+                        video_url, video_id, f4m_id=format_id))
+                elif ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        video_url, video_id, 'mp4', m3u8_id=format_id))
+                elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
+                    pass
+                else:
+                    formats.append({
+                        'url': video_url,
+                        'format_id': format_id,
+                        'tbr': int_or_none(item.get('bitrate')),
+                        'filesize': int_or_none(item.get('fileSize')),
+                    })
+        self._sort_formats(formats)
+
+        asset = self._download_json(
+            'http://sumo.tv2.no/api/web/asset/%s.json' % video_id,
+            video_id, 'Downloading metadata JSON')['asset']
+
+        title = asset['title']
+        description = asset.get('description')
+        timestamp = parse_iso8601(asset.get('createTime'))
+        duration = float_or_none(asset.get('accurateDuration') or asset.get('duration'))
+        view_count = int_or_none(asset.get('views'))
+        categories = asset.get('keywords', '').split(',')
+
+        thumbnails = [{
+            'id': thumbnail.get('@type'),
+            'url': thumbnail.get('url'),
+        } for _, thumbnail in asset.get('imageVersions', {}).items()]
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnails': thumbnails,
+            'timestamp': timestamp,
+            'duration': duration,
+            'view_count': view_count,
+            'categories': categories,
+            'formats': formats,
+        }
author	Sergey M․ <dstftw@gmail.com>
	Sat, 16 May 2015 21:01:52 +0000 (03:01 +0600)
committer	Sergey M․ <dstftw@gmail.com>
	Sat, 16 May 2015 21:01:52 +0000 (03:01 +0600)
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/tv2.py	[new file with mode: 0644]	patch \| blob