[vice] Do not capture unused groups in _VALID_URL
[youtube-dl] / youtube_dl / extractor / vice.py
1 from __future__ import unicode_literals
2 import re
3
4 from .common import InfoExtractor
5 from .ooyala import OoyalaIE
6 from ..utils import ExtractorError
7
8
9 class ViceIE(InfoExtractor):
10     _VALID_URL = r'https?://(?:.+?\.)?vice\.com/.*?/(?P<name>.+)'
11
12     _TESTS = [
13         {
14             'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
15             'info_dict': {
16                 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
17                 'ext': 'mp4',
18                 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
19             },
20             'params': {
21                 # Requires ffmpeg (m3u8 manifest)
22                 'skip_download': True,
23             },
24         }, {
25             'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
26             'info_dict': {
27                 'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj',
28                 'ext': 'mp4',
29                 'title': 'VICE News - Inside the Monkey Lab',
30                 'description': 'md5:1f660d467d3515f29d11e5ef742a4b82',
31             },
32             'params': {
33                 # Requires ffmpeg (m3u8 manifest)
34                 'skip_download': True,
35             },
36         }
37     ]
38
39     def _real_extract(self, url):
40         mobj = re.match(self._VALID_URL, url)
41         name = mobj.group('name')
42         webpage = self._download_webpage(url, name)
43         try:
44             embed_code = self._search_regex(
45                 r'embedCode=([^&\'"]+)', webpage,
46                 'ooyala embed code')
47             ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
48         except ExtractorError:
49             raise ExtractorError('The page doesn\'t contain a video', expected=True)
50         return self.url_result(ooyala_url, ie='Ooyala')