]> git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/collegerama.py
[pornhub] Extract categories and tags (closes #10499)
[youtube-dl] / youtube_dl / extractor / collegerama.py
1 from __future__ import unicode_literals
2
3 import json
4
5 from .common import InfoExtractor
6 from ..utils import (
7     float_or_none,
8     int_or_none,
9     sanitized_Request,
10 )
11
12
13 class CollegeRamaIE(InfoExtractor):
14     _VALID_URL = r'https?://collegerama\.tudelft\.nl/Mediasite/Play/(?P<id>[\da-f]+)'
15     _TESTS = [
16         {
17             'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
18             'md5': '481fda1c11f67588c0d9d8fbdced4e39',
19             'info_dict': {
20                 'id': '585a43626e544bdd97aeb71a0ec907a01d',
21                 'ext': 'mp4',
22                 'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
23                 'description': '',
24                 'thumbnail': 're:^https?://.*\.jpg$',
25                 'duration': 7713.088,
26                 'timestamp': 1413309600,
27                 'upload_date': '20141014',
28             },
29         },
30         {
31             'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
32             'md5': 'ef1fdded95bdf19b12c5999949419c92',
33             'info_dict': {
34                 'id': '86a9ea9f53e149079fbdb4202b521ed21d',
35                 'ext': 'wmv',
36                 'title': '64ste Vakantiecursus: Afvalwater',
37                 'description': 'md5:7fd774865cc69d972f542b157c328305',
38                 'duration': 10853,
39                 'timestamp': 1326446400,
40                 'upload_date': '20120113',
41             },
42         },
43     ]
44
45     def _real_extract(self, url):
46         video_id = self._match_id(url)
47
48         player_options_request = {
49             'getPlayerOptionsRequest': {
50                 'ResourceId': video_id,
51                 'QueryString': '',
52             }
53         }
54
55         request = sanitized_Request(
56             'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
57             json.dumps(player_options_request))
58         request.add_header('Content-Type', 'application/json')
59
60         player_options = self._download_json(request, video_id)
61
62         presentation = player_options['d']['Presentation']
63         title = presentation['Title']
64         description = presentation.get('Description')
65         thumbnail = None
66         duration = float_or_none(presentation.get('Duration'), 1000)
67         timestamp = int_or_none(presentation.get('UnixTime'), 1000)
68
69         formats = []
70         for stream in presentation['Streams']:
71             for video in stream['VideoUrls']:
72                 thumbnail_url = stream.get('ThumbnailUrl')
73                 if thumbnail_url:
74                     thumbnail = 'http://collegerama.tudelft.nl' + thumbnail_url
75                 format_id = video['MediaType']
76                 if format_id == 'SS':
77                     continue
78                 formats.append({
79                     'url': video['Location'],
80                     'format_id': format_id,
81                 })
82         self._sort_formats(formats)
83
84         return {
85             'id': video_id,
86             'title': title,
87             'description': description,
88             'thumbnail': thumbnail,
89             'duration': duration,
90             'timestamp': timestamp,
91             'formats': formats,
92         }