[vshare] Capture and output error message
[youtube-dl] / youtube_dl / extractor / vshare.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_chr
8 from ..utils import (
9     decode_packed_codes,
10     ExtractorError,
11 )
12
13
14 class VShareIE(InfoExtractor):
15     _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
16     _TESTS = [{
17         'url': 'https://vshare.io/d/0f64ce6',
18         'md5': '17b39f55b5497ae8b59f5fbce8e35886',
19         'info_dict': {
20             'id': '0f64ce6',
21             'title': 'vl14062007715967',
22             'ext': 'mp4',
23         }
24     }, {
25         'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
26         'only_matching': True,
27     }]
28
29     def _extract_packed(self, webpage):
30         packed = self._search_regex(r'(eval\(function.+)', webpage, 'packed code')
31         unpacked = decode_packed_codes(packed)
32         digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
33         digits = digits.split(',')
34         digits = [int(digit) for digit in digits]
35         key_digit = self._search_regex(r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
36         chars = [compat_chr(d - int(key_digit)) for d in digits]
37         return ''.join(chars)
38
39     def _real_extract(self, url):
40         video_id = self._match_id(url)
41
42         webpage = self._download_webpage(
43             'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id)
44
45         title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
46         title = title.split(' - ')[0]
47
48         error = self._html_search_regex(
49             r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
50             'error', default=None)
51         if error:
52             raise ExtractorError(error, expected=True)
53
54         unpacked = self._extract_packed(webpage)
55         video_urls = re.findall(r'<source src="([^"]+)', unpacked)
56         formats = [{'url': video_url} for video_url in video_urls]
57         return {
58             'id': video_id,
59             'title': title,
60             'formats': formats,
61         }
62
63     @staticmethod
64     def _extract_urls(webpage):
65         return re.findall(
66             r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
67             webpage)