[youtube] fix hd720 format position
[youtube-dl] / youtube_dl / extractor / joj.py
1 # coding: utf-8\r
2 from __future__ import unicode_literals\r
3 \r
4 import re\r
5 \r
6 from .common import InfoExtractor\r
7 from ..compat import compat_str\r
8 from ..utils import (\r
9     int_or_none,\r
10     js_to_json,\r
11     try_get,\r
12 )\r
13 \r
14 \r
15 class JojIE(InfoExtractor):\r
16     _VALID_URL = r'''(?x)\r
17                     (?:\r
18                         joj:|\r
19                         https?://media\.joj\.sk/embed/\r
20                     )\r
21                     (?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\r
22                 '''\r
23     _TESTS = [{\r
24         'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',\r
25         'info_dict': {\r
26             'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',\r
27             'ext': 'mp4',\r
28             'title': 'NOVÉ BÝVANIE',\r
29             'thumbnail': r're:^https?://.*\.jpg$',\r
30             'duration': 3118,\r
31         }\r
32     }, {\r
33         'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',\r
34         'only_matching': True,\r
35     }]\r
36 \r
37     @staticmethod\r
38     def _extract_urls(webpage):\r
39         return re.findall(\r
40             r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//media\.joj\.sk/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',\r
41             webpage)\r
42 \r
43     def _real_extract(self, url):\r
44         video_id = self._match_id(url)\r
45 \r
46         webpage = self._download_webpage(\r
47             'https://media.joj.sk/embed/%s' % video_id, video_id)\r
48 \r
49         title = self._search_regex(\r
50             (r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',\r
51              r'<title>(?P<title>[^<]+)'), webpage, 'title',\r
52             default=None, group='title') or self._og_search_title(webpage)\r
53 \r
54         bitrates = self._parse_json(\r
55             self._search_regex(\r
56                 r'(?s)bitrates\s*=\s*({.+?});', webpage, 'bitrates',\r
57                 default='{}'),\r
58             video_id, transform_source=js_to_json, fatal=False)\r
59 \r
60         formats = []\r
61         for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:\r
62             if isinstance(format_url, compat_str):\r
63                 height = self._search_regex(\r
64                     r'(\d+)[pP]\.', format_url, 'height', default=None)\r
65                 formats.append({\r
66                     'url': format_url,\r
67                     'format_id': '%sp' % height if height else None,\r
68                     'height': int(height),\r
69                 })\r
70         if not formats:\r
71             playlist = self._download_xml(\r
72                 'https://media.joj.sk/services/Video.php?clip=%s' % video_id,\r
73                 video_id)\r
74             for file_el in playlist.findall('./files/file'):\r
75                 path = file_el.get('path')\r
76                 if not path:\r
77                     continue\r
78                 format_id = file_el.get('id') or file_el.get('label')\r
79                 formats.append({\r
80                     'url': 'http://n16.joj.sk/storage/%s' % path.replace(\r
81                         'dat/', '', 1),\r
82                     'format_id': format_id,\r
83                     'height': int_or_none(self._search_regex(\r
84                         r'(\d+)[pP]', format_id or path, 'height',\r
85                         default=None)),\r
86                 })\r
87         self._sort_formats(formats)\r
88 \r
89         thumbnail = self._og_search_thumbnail(webpage)\r
90 \r
91         duration = int_or_none(self._search_regex(\r
92             r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))\r
93 \r
94         return {\r
95             'id': video_id,\r
96             'title': title,\r
97             'thumbnail': thumbnail,\r
98             'duration': duration,\r
99             'formats': formats,\r
100         }\r