Merge pull request #6439 from remitamine/facebook
[youtube-dl] / youtube_dl / extractor / screenwavemedia.py
1 # encoding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     int_or_none,
7     unified_strdate,
8     js_to_json,
9 )
10
11
12 class ScreenwaveMediaIE(InfoExtractor):
13     _VALID_URL = r'http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
14
15     _TESTS = [{
16         'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
17         'only_matching': True,
18     }]
19
20     def _real_extract(self, url):
21         video_id = self._match_id(url)
22
23         playerdata = self._download_webpage(
24             'http://player.screenwavemedia.com/player.php?id=%s' % video_id,
25             video_id, 'Downloading player webpage')
26
27         vidtitle = self._search_regex(
28             r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
29
30         playerconfig = self._download_webpage(
31             'http://player.screenwavemedia.com/player.js',
32             video_id, 'Downloading playerconfig webpage')
33
34         videoserver = self._search_regex(r"\[ipaddress\]\s*=>\s*([\d\.]+)", playerdata, 'videoserver')
35
36         sources = self._parse_json(
37             js_to_json(
38                 self._search_regex(
39                     r"sources\s*:\s*(\[[^\]]+?\])", playerconfig,
40                     'sources',
41                 ).replace(
42                     "' + thisObj.options.videoserver + '",
43                     videoserver
44                 ).replace(
45                     "' + playerVidId + '",
46                     video_id
47                 )
48             ),
49             video_id
50         )
51
52         formats = []
53         for source in sources:
54             if source['type'] == 'hls':
55                 formats.extend(self._extract_m3u8_formats(source['file'], video_id))
56             else:
57                 format_label = source.get('label')
58                 height = int_or_none(self._search_regex(
59                     r'^(\d+)[pP]', format_label, 'height', default=None))
60                 formats.append({
61                     'url': source['file'],
62                     'format': format_label,
63                     'ext': source.get('type'),
64                     'height': height,
65                 })
66         self._sort_formats(formats)
67
68         return {
69             'id': video_id,
70             'title': vidtitle,
71             'formats': formats,
72         }
73
74
75 class TeamFourIE(InfoExtractor):
76     _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
77     _TEST = {
78         'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
79         'info_dict': {
80             'id': 'TeamFourStar-5292a02f20bfa',
81             'ext': 'mp4',
82             'upload_date': '20130401',
83             'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
84             'title': 'A Moment With TFS Episode 4',
85         }
86     }
87
88     def _real_extract(self, url):
89         display_id = self._match_id(url)
90         webpage = self._download_webpage(url, display_id)
91
92         playerdata_url = self._search_regex(
93             r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
94             webpage, 'player data URL')
95
96         video_title = self._html_search_regex(
97             r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
98             webpage, 'title')
99         video_date = unified_strdate(self._html_search_regex(
100             r'<div class="heroheadingdate">(?P<date>.+?)</div>',
101             webpage, 'date', fatal=False))
102         video_description = self._html_search_regex(
103             r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
104             webpage, 'description', fatal=False)
105         video_thumbnail = self._og_search_thumbnail(webpage)
106
107         return {
108             '_type': 'url_transparent',
109             'display_id': display_id,
110             'title': video_title,
111             'description': video_description,
112             'upload_date': video_date,
113             'thumbnail': video_thumbnail,
114             'url': playerdata_url,
115         }