[fivemin] Get the 'sid' from the embed page (fixes #2745)
[youtube-dl] / youtube_dl / extractor / fivemin.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7     compat_str,
8     compat_urllib_parse,
9 )
10
11
12 class FiveMinIE(InfoExtractor):
13     IE_NAME = '5min'
14     _VALID_URL = r'''(?x)
15         (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(.*?&)?playList=|
16             5min:)
17         (?P<id>\d+)
18         '''
19
20     _TESTS = [
21         {
22             # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
23             'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
24             'md5': '4f7b0b79bf1a470e5004f7112385941d',
25             'info_dict': {
26                 'id': '518013791',
27                 'ext': 'mp4',
28                 'title': 'iPad Mini with Retina Display Review',
29             },
30         },
31         {
32             # From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247
33             'url': '5min:518086247',
34             'md5': 'e539a9dd682c288ef5a498898009f69e',
35             'info_dict': {
36                 'id': '518086247',
37                 'ext': 'mp4',
38                 'title': 'How to Make a Next-Level Fruit Salad',
39             },
40         },
41     ]
42
43     @classmethod
44     def _build_result(cls, video_id):
45         return cls.url_result('5min:%s' % video_id, cls.ie_key())
46
47     def _real_extract(self, url):
48         mobj = re.match(self._VALID_URL, url)
49         video_id = mobj.group('id')
50         embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
51         embed_page = self._download_webpage(embed_url, video_id,
52             'Downloading embed page')
53         sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
54         query = compat_urllib_parse.urlencode({
55             'func': 'GetResults',
56             'playlist': video_id,
57             'sid': sid,
58             'isPlayerSeed': 'true',
59             'url': embed_url,
60         })
61         info = self._download_json(
62             'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
63             video_id)['binding'][0]
64
65         second_id = compat_str(int(video_id[:-2]) + 1)
66         formats = []
67         for quality, height in [(1, 320), (2, 480), (4, 720), (8, 1080)]:
68             if any(r['ID'] == quality for r in info['Renditions']):
69                 formats.append({
70                     'format_id': compat_str(quality),
71                     'url': 'http://avideos.5min.com/%s/%s/%s_%s.mp4' % (second_id[-3:], second_id, video_id, quality),
72                     'height': height,
73                 })
74
75         return {
76             'id': video_id,
77             'title': info['Title'],
78             'formats': formats,
79         }