[tvc] Add extractor (Closes #5795)
authorHannu Lintala <hannu.lintala@gmail.com>
Sun, 7 Jun 2015 14:25:30 +0000 (17:25 +0300)
committerHannu Lintala <hannu.lintala@gmail.com>
Thu, 11 Jun 2015 22:34:10 +0000 (01:34 +0300)
youtube_dl/extractor/__init__.py
youtube_dl/extractor/tvc.py [new file with mode: 0644]

index 67eb960575a9495e8499506b9cb6ef6b32d55eb6..8c4e1290415ecbac4551470f1e3b9c9c9bfb994e 100644 (file)
@@ -582,6 +582,7 @@ from .tv2 import (
     TV2ArticleIE,
 )
 from .tv4 import TV4IE
     TV2ArticleIE,
 )
 from .tv4 import TV4IE
+from .tvc import TVCIE
 from .tvigle import TvigleIE
 from .tvp import TvpIE, TvpSeriesIE
 from .tvplay import TVPlayIE
 from .tvigle import TvigleIE
 from .tvp import TvpIE, TvpSeriesIE
 from .tvplay import TVPlayIE
diff --git a/youtube_dl/extractor/tvc.py b/youtube_dl/extractor/tvc.py
new file mode 100644 (file)
index 0000000..b62ab85
--- /dev/null
@@ -0,0 +1,79 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    str_or_none,
+)
+
+
+class TVCIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?tvc\.ru/.*/show/.*id/(?P<id>\d+)'
+    _TESTS = [
+        {
+            'url': 'http://www.tvc.ru/channel/brand/id/29/show/episodes/episode_id/39702/',
+            'md5': 'aa6fb3cf384e18a0ad3b30ee2898beba',
+            'info_dict': {
+                'id': '74622',
+                'display_id': '39702',
+                'ext': 'mp4',
+                'title': 'События. "События". Эфир от 22.05.2015 14:30',
+                'description': 'md5:ad7aa7db22903f983e687b8a3e98c6dd',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 1122,
+            },
+        },
+        {
+            'url': 'http://www.tvc.ru/news/show/id/69944',
+            'md5': 'b173128ee7b88b5b06c84e5f7880909f',
+            'info_dict': {
+                'id': '75399',
+                'display_id': '69944',
+                'ext': 'mp4',
+                'title': 'Эксперты: в столице встал вопрос о максимально безопасных остановках',
+                'description': 'md5:f675c8eaf23aab9df542d31773ed6518',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 278,
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_url = self._og_search_video_url(webpage)
+
+        video_id = self._search_regex(
+            r'video/iframe/id/(\d+)/', video_url, 'video id')
+
+        video_json_url = 'http://www.tvc.ru/video/json/id/%s' % (video_id)
+
+        video_json = self._download_json(video_json_url, video_id)
+
+        formats = []
+        for info in video_json.get('path', {}).get('quality', []):
+            format_id = self._search_regex(
+                r'cdnvideo/([^-]+)-[^/]+/', info.get('url'), 'format id',
+                fatal=False)
+            formats.append({
+                'format_id': str_or_none(format_id),
+                'url': info.get('url'),
+                'width': int_or_none(info.get('width')),
+                'height': int_or_none(info.get('height')),
+                'tbr': int_or_none(info.get('bitrate')),
+            })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'duration': int_or_none(video_json.get('duration')),
+            'formats': formats,
+        }