[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / giga.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import itertools
5
6 from .common import InfoExtractor
7 from ..utils import (
8     qualities,
9     compat_str,
10     parse_duration,
11     parse_iso8601,
12     str_to_int,
13 )
14
15
16 class GigaIE(InfoExtractor):
17     _VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)'
18     _TESTS = [{
19         'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/',
20         'md5': '6bc5535e945e724640664632055a584f',
21         'info_dict': {
22             'id': '2622086',
23             'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss',
24             'ext': 'mp4',
25             'title': 'Anime Awesome: Chihiros Reise ins Zauberland – Das Beste kommt zum Schluss',
26             'description': 'md5:afdf5862241aded4718a30dff6a57baf',
27             'thumbnail': r're:^https?://.*\.jpg$',
28             'duration': 578,
29             'timestamp': 1414749706,
30             'upload_date': '20141031',
31             'uploader': 'Robin Schweiger',
32             'view_count': int,
33         },
34     }, {
35         'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/',
36         'only_matching': True,
37     }, {
38         'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/',
39         'only_matching': True,
40     }, {
41         'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/',
42         'only_matching': True,
43     }]
44
45     def _real_extract(self, url):
46         display_id = self._match_id(url)
47
48         webpage = self._download_webpage(url, display_id)
49
50         video_id = self._search_regex(
51             [r'data-video-id="(\d+)"', r'/api/video/jwplayer/#v=(\d+)'],
52             webpage, 'video id')
53
54         playlist = self._download_json(
55             'http://www.giga.de/api/syndication/video/video_id/%s/playlist.json?content=syndication/key/368b5f151da4ae05ced7fa296bdff65a/'
56             % video_id, video_id)[0]
57
58         quality = qualities(['normal', 'hd720'])
59
60         formats = []
61         for format_id in itertools.count(0):
62             fmt = playlist.get(compat_str(format_id))
63             if not fmt:
64                 break
65             formats.append({
66                 'url': fmt['src'],
67                 'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]),
68                 'quality': quality(fmt['quality']),
69             })
70         self._sort_formats(formats)
71
72         title = self._html_search_meta(
73             'title', webpage, 'title', fatal=True)
74         description = self._html_search_meta(
75             'description', webpage, 'description')
76         thumbnail = self._og_search_thumbnail(webpage)
77
78         duration = parse_duration(self._search_regex(
79             r'(?s)(?:data-video-id="{0}"|data-video="[^"]*/api/video/jwplayer/#v={0}[^"]*")[^>]*>.+?<span class="duration">([^<]+)</span>'.format(video_id),
80             webpage, 'duration', fatal=False))
81
82         timestamp = parse_iso8601(self._search_regex(
83             r'datetime="([^"]+)"', webpage, 'upload date', fatal=False))
84         uploader = self._search_regex(
85             r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
86
87         view_count = str_to_int(self._search_regex(
88             r'<span class="views"><strong>([\d.,]+)</strong>',
89             webpage, 'view count', fatal=False))
90
91         return {
92             'id': video_id,
93             'display_id': display_id,
94             'title': title,
95             'description': description,
96             'thumbnail': thumbnail,
97             'duration': duration,
98             'timestamp': timestamp,
99             'uploader': uploader,
100             'view_count': view_count,
101             'formats': formats,
102         }