]> git.bitcoin.ninja Git - youtube-dl/commitdiff
[tv2] Add extractor (#5724)
authorSergey M․ <dstftw@gmail.com>
Sat, 16 May 2015 21:01:52 +0000 (03:01 +0600)
committerSergey M․ <dstftw@gmail.com>
Sat, 16 May 2015 21:01:52 +0000 (03:01 +0600)
youtube_dl/extractor/__init__.py
youtube_dl/extractor/tv2.py [new file with mode: 0644]

index 6b19eb6f822f2ef4cacf012b06998e281c2d08c4..fb4f63ca3aef64530329b386307e962944f989ae 100644 (file)
@@ -572,6 +572,7 @@ from .tumblr import TumblrIE
 from .tunein import TuneInIE
 from .turbo import TurboIE
 from .tutv import TutvIE
 from .tunein import TuneInIE
 from .turbo import TurboIE
 from .tutv import TutvIE
+from .tv2 import TV2IE
 from .tv4 import TV4IE
 from .tvigle import TvigleIE
 from .tvp import TvpIE, TvpSeriesIE
 from .tv4 import TV4IE
 from .tvigle import TvigleIE
 from .tvp import TvpIE, TvpSeriesIE
diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py
new file mode 100644 (file)
index 0000000..2dcc0e9
--- /dev/null
@@ -0,0 +1,93 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    float_or_none,
+    parse_iso8601,
+)
+
+
+class TV2IE(InfoExtractor):
+    _VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.tv2.no/v/916509/',
+        'md5': '9cb9e3410b18b515d71892f27856e9b1',
+        'info_dict': {
+            'id': '916509',
+            'ext': 'flv',
+            'title': 'Se Gryttens hyllest av Steven Gerrard',
+            'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
+            'timestamp': 1431715610,
+            'upload_date': '20150515',
+            'duration': 156.967,
+            'view_count': int,
+            'categories': list,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        formats = []
+        format_urls = []
+        for protocol in ('HDS', 'HLS'):
+            data = self._download_json(
+                'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol),
+                video_id, 'Downloading play JSON')['playback']
+            for item in data['items']['item']:
+                video_url = item.get('url')
+                if not video_url or video_url in format_urls:
+                    continue
+                format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat'))
+                if not self._is_valid_url(video_url, video_id, format_id):
+                    continue
+                format_urls.append(video_url)
+                ext = determine_ext(video_url)
+                if ext == 'f4m':
+                    formats.extend(self._extract_f4m_formats(
+                        video_url, video_id, f4m_id=format_id))
+                elif ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        video_url, video_id, 'mp4', m3u8_id=format_id))
+                elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
+                    pass
+                else:
+                    formats.append({
+                        'url': video_url,
+                        'format_id': format_id,
+                        'tbr': int_or_none(item.get('bitrate')),
+                        'filesize': int_or_none(item.get('fileSize')),
+                    })
+        self._sort_formats(formats)
+
+        asset = self._download_json(
+            'http://sumo.tv2.no/api/web/asset/%s.json' % video_id,
+            video_id, 'Downloading metadata JSON')['asset']
+
+        title = asset['title']
+        description = asset.get('description')
+        timestamp = parse_iso8601(asset.get('createTime'))
+        duration = float_or_none(asset.get('accurateDuration') or asset.get('duration'))
+        view_count = int_or_none(asset.get('views'))
+        categories = asset.get('keywords', '').split(',')
+
+        thumbnails = [{
+            'id': thumbnail.get('@type'),
+            'url': thumbnail.get('url'),
+        } for _, thumbnail in asset.get('imageVersions', {}).items()]
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnails': thumbnails,
+            'timestamp': timestamp,
+            'duration': duration,
+            'view_count': view_count,
+            'categories': categories,
+            'formats': formats,
+        }