Merge remote-tracking branch 'adammw/adultswim'
[youtube-dl] / youtube_dl / extractor / goshgay.py
1 # -*- coding: utf-8 -*-
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     compat_urlparse,
9     str_to_int,
10     ExtractorError,
11 )
12 import json
13
14
15 class GoshgayIE(InfoExtractor):
16     _VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)'
17     _TEST = {
18         'url': 'http://www.goshgay.com/video4116282',
19         'md5': '268b9f3c3229105c57859e166dd72b03',
20         'info_dict': {
21             'id': '4116282',
22             'ext': 'flv',
23             'title': 'md5:089833a4790b5e103285a07337f245bf',
24             'thumbnail': 're:http://.*\.jpg',
25             'age_limit': 18,
26         }
27     }
28
29     def _real_extract(self, url):
30         mobj = re.match(self._VALID_URL, url)
31         video_id = mobj.group('id')
32
33         webpage = self._download_webpage(url, video_id)
34         title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title')
35
36         player_config = self._search_regex(
37             r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings')
38         player_vars = json.loads(player_config.replace("'", '"'))
39         width = str_to_int(player_vars.get('width'))
40         height = str_to_int(player_vars.get('height'))
41         config_uri = player_vars.get('config')
42
43         if config_uri is None:
44             raise ExtractorError('Missing config URI')
45         node = self._download_xml(config_uri, video_id, 'Downloading player config XML',
46                                   errnote='Unable to download XML')
47         if node is None:
48             raise ExtractorError('Missing config XML')
49         if node.tag != 'config':
50             raise ExtractorError('Missing config attribute')
51         fns = node.findall('file')
52         imgs = node.findall('image')
53         if len(fns) != 1:
54             raise ExtractorError('Missing media URI')
55         video_url = fns[0].text
56         if len(imgs) < 1:
57             thumbnail = None
58         else:
59             thumbnail = imgs[0].text
60
61         url_comp = compat_urlparse.urlparse(url)
62         ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2])
63
64         return {
65             'id': video_id,
66             'url': video_url,
67             'title': title,
68             'width': width,
69             'height': height,
70             'thumbnail': thumbnail,
71             'http_referer': ref,
72             'age_limit': 18,
73         }