[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / internetvideoarchive.py
1 from __future__ import unicode_literals
2
3 import json
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import (
8     compat_parse_qs,
9     compat_urlparse,
10 )
11
12
13 class InternetVideoArchiveIE(InfoExtractor):
14     _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
15
16     _TEST = {
17         'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
18         'info_dict': {
19             'id': '194487',
20             'ext': 'mp4',
21             'title': 'Kick-Ass 2',
22             'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
23         },
24         'params': {
25             # m3u8 download
26             'skip_download': True,
27         },
28     }
29
30     @staticmethod
31     def _build_json_url(query):
32         return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
33
34     def _real_extract(self, url):
35         query = compat_parse_qs(compat_urlparse.urlparse(url).query)
36         video_id = query['publishedid'][0]
37         data = self._download_json(
38             'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx',
39             video_id, data=json.dumps({
40                 'customerid': query['customerid'][0],
41                 'publishedid': video_id,
42             }).encode())
43         title = data['Title']
44         formats = self._extract_m3u8_formats(
45             data['VideoUrl'], video_id, 'mp4',
46             'm3u8_native', m3u8_id='hls', fatal=False)
47         file_url = formats[0]['url']
48         if '.ism/' in file_url:
49             replace_url = lambda x: re.sub(r'\.ism/[^?]+', '.ism/' + x, file_url)
50             formats.extend(self._extract_f4m_formats(
51                 replace_url('.f4m'), video_id, f4m_id='hds', fatal=False))
52             formats.extend(self._extract_mpd_formats(
53                 replace_url('.mpd'), video_id, mpd_id='dash', fatal=False))
54             formats.extend(self._extract_ism_formats(
55                 replace_url('Manifest'), video_id, ism_id='mss', fatal=False))
56         self._sort_formats(formats)
57
58         return {
59             'id': video_id,
60             'title': title,
61             'formats': formats,
62             'thumbnail': data.get('PosterUrl'),
63             'description': data.get('Description'),
64         }