[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / moevideo.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     clean_html,
9     int_or_none,
10 )
11
12
13 class MoeVideoIE(InfoExtractor):
14     IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net'
15     _VALID_URL = r'''(?x)
16         https?://(?P<host>(?:www\.)?
17         (?:(?:moevideo|playreplay|videochart)\.net|thesame\.tv))/
18         (?:video|framevideo|embed)/(?P<id>[0-9a-z]+\.[0-9A-Za-z]+)'''
19     _API_URL = 'http://api.letitbit.net/'
20     _API_KEY = 'tVL0gjqo5'
21     _TESTS = [
22         {
23             'url': 'http://moevideo.net/video/00297.0036103fe3d513ef27915216fd29',
24             'md5': '129f5ae1f6585d0e9bb4f38e774ffb3a',
25             'info_dict': {
26                 'id': '00297.0036103fe3d513ef27915216fd29',
27                 'ext': 'flv',
28                 'title': 'Sink cut out machine',
29                 'description': 'md5:f29ff97b663aefa760bf7ca63c8ca8a8',
30                 'thumbnail': r're:^https?://.*\.jpg$',
31                 'width': 540,
32                 'height': 360,
33                 'duration': 179,
34                 'filesize': 17822500,
35             },
36             'skip': 'Video has been removed',
37         },
38         {
39             'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a',
40             'md5': '74f0a014d5b661f0f0e2361300d1620e',
41             'info_dict': {
42                 'id': '77107.7f325710a627383d40540d8e991a',
43                 'ext': 'flv',
44                 'title': 'Operacion Condor.',
45                 'description': 'md5:7e68cb2fcda66833d5081c542491a9a3',
46                 'thumbnail': r're:^https?://.*\.jpg$',
47                 'width': 480,
48                 'height': 296,
49                 'duration': 6027,
50                 'filesize': 588257923,
51             },
52             'skip': 'Video has been removed',
53         },
54     ]
55
56     def _real_extract(self, url):
57         host, video_id = re.match(self._VALID_URL, url).groups()
58
59         webpage = self._download_webpage(
60             'http://%s/video/%s' % (host, video_id),
61             video_id, 'Downloading webpage')
62
63         title = self._og_search_title(webpage)
64
65         embed_webpage = self._download_webpage(
66             'http://%s/embed/%s' % (host, video_id),
67             video_id, 'Downloading embed webpage')
68         video = self._parse_json(self._search_regex(
69             r'mvplayer\("#player"\s*,\s*({.+})',
70             embed_webpage, 'mvplayer'), video_id)['video']
71
72         return {
73             'id': video_id,
74             'title': title,
75             'thumbnail': video.get('poster') or self._og_search_thumbnail(webpage),
76             'description': clean_html(self._og_search_description(webpage)),
77             'duration': int_or_none(self._og_search_property('video:duration', webpage)),
78             'url': video['ourUrl'],
79         }