[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / pornotube.py
1 from __future__ import unicode_literals
2
3 import json
4
5 from .common import InfoExtractor
6 from ..utils import int_or_none
7
8
9 class PornotubeIE(InfoExtractor):
10     _VALID_URL = r'https?://(?:\w+\.)?pornotube\.com/(?:[^?#]*?)/video/(?P<id>[0-9]+)'
11     _TEST = {
12         'url': 'http://www.pornotube.com/orientation/straight/video/4964/title/weird-hot-and-wet-science',
13         'md5': '60fc5a4f0d93a97968fc7999d98260c9',
14         'info_dict': {
15             'id': '4964',
16             'ext': 'mp4',
17             'upload_date': '20141203',
18             'title': 'Weird Hot and Wet Science',
19             'description': 'md5:a8304bef7ef06cb4ab476ca6029b01b0',
20             'categories': ['Adult Humor', 'Blondes'],
21             'uploader': 'Alpha Blue Archives',
22             'thumbnail': r're:^https?://.*\.jpg$',
23             'timestamp': 1417582800,
24             'age_limit': 18,
25         }
26     }
27
28     def _real_extract(self, url):
29         video_id = self._match_id(url)
30
31         token = self._download_json(
32             'https://api.aebn.net/auth/v2/origins/authenticate',
33             video_id, note='Downloading token',
34             data=json.dumps({'credentials': 'Clip Application'}).encode('utf-8'),
35             headers={
36                 'Content-Type': 'application/json',
37                 'Origin': 'http://www.pornotube.com',
38             })['tokenKey']
39
40         video_url = self._download_json(
41             'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id,
42             video_id, note='Downloading delivery information',
43             headers={'Authorization': token})['mediaUrl']
44
45         FIELDS = (
46             'title', 'description', 'startSecond', 'endSecond', 'publishDate',
47             'studios{name}', 'categories{name}', 'movieId', 'primaryImageNumber'
48         )
49
50         info = self._download_json(
51             'https://api.aebn.net/content/v2/clips/%s?fields=%s'
52             % (video_id, ','.join(FIELDS)), video_id,
53             note='Downloading metadata',
54             headers={'Authorization': token})
55
56         if isinstance(info, list):
57             info = info[0]
58
59         title = info['title']
60
61         timestamp = int_or_none(info.get('publishDate'), scale=1000)
62         uploader = info.get('studios', [{}])[0].get('name')
63         movie_id = info.get('movieId')
64         primary_image_number = info.get('primaryImageNumber')
65         thumbnail = None
66         if movie_id and primary_image_number:
67             thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % (
68                 movie_id, movie_id, primary_image_number)
69         start = int_or_none(info.get('startSecond'))
70         end = int_or_none(info.get('endSecond'))
71         duration = end - start if start and end else None
72         categories = [c['name'] for c in info.get('categories', []) if c.get('name')]
73
74         return {
75             'id': video_id,
76             'url': video_url,
77             'title': title,
78             'description': info.get('description'),
79             'duration': duration,
80             'timestamp': timestamp,
81             'uploader': uploader,
82             'thumbnail': thumbnail,
83             'categories': categories,
84             'age_limit': 18,
85         }