[ae] Improve _VALID_URL
[youtube-dl] / youtube_dl / extractor / ae.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..utils import smuggle_url
5
6
7 class AEIE(InfoExtractor):
8     _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
9
10     _TESTS = [{
11         'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
12         'info_dict': {
13             'id': 'g12m5Gyt3fdR',
14             'ext': 'mp4',
15             'title': "Bet You Didn't Know: Valentine's Day",
16             'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
17         },
18         'params': {
19             # m3u8 download
20             'skip_download': True,
21         },
22         'add_ie': ['ThePlatform'],
23     }, {
24         'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
25         'info_dict': {
26             'id': 'eg47EERs_JsZ',
27             'ext': 'mp4',
28             'title': "Winter Is Coming",
29             'description': 'md5:a40e370925074260b1c8a633c632c63a',
30         },
31         'params': {
32             # m3u8 download
33             'skip_download': True,
34         },
35         'add_ie': ['ThePlatform'],
36     }, {
37         'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry',
38         'only_matching': True
39     }, {
40         'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage',
41         'only_matching': True
42     }, {
43         'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients',
44         'only_matching': True
45     }]
46
47     def _real_extract(self, url):
48         video_id = self._match_id(url)
49
50         webpage = self._download_webpage(url, video_id)
51
52         video_url_re = [
53             r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
54             r"media_url\s*=\s*'([^']+)'"
55         ]
56         video_url = self._search_regex(video_url_re, webpage, 'video url')
57
58         return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}))