]> git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/tweakers.py
[pornhub] Extract metadata from JSON-LD (closes #26614)
[youtube-dl] / youtube_dl / extractor / tweakers.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..utils import (
5     int_or_none,
6     determine_ext,
7     mimetype2ext,
8 )
9
10
11 class TweakersIE(InfoExtractor):
12     _VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)'
13     _TEST = {
14         'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html',
15         'md5': 'fe73e417c093a788e0160c4025f88b15',
16         'info_dict': {
17             'id': '9926',
18             'ext': 'mp4',
19             'title': 'New Nintendo 3DS XL - Op alle fronten beter',
20             'description': 'md5:3789b21fed9c0219e9bcaacd43fab280',
21             'thumbnail': r're:^https?://.*\.jpe?g$',
22             'duration': 386,
23             'uploader_id': 's7JeEm',
24         }
25     }
26
27     def _real_extract(self, url):
28         video_id = self._match_id(url)
29         video_data = self._download_json(
30             'https://tweakers.net/video/s1playlist/%s/1920/1080/playlist.json' % video_id,
31             video_id)['items'][0]
32
33         title = video_data['title']
34
35         formats = []
36         for location in video_data.get('locations', {}).get('progressive', []):
37             format_id = location.get('label')
38             width = int_or_none(location.get('width'))
39             height = int_or_none(location.get('height'))
40             for source in location.get('sources', []):
41                 source_url = source.get('src')
42                 if not source_url:
43                     continue
44                 ext = mimetype2ext(source.get('type')) or determine_ext(source_url)
45                 formats.append({
46                     'format_id': format_id,
47                     'url': source_url,
48                     'width': width,
49                     'height': height,
50                     'ext': ext,
51                 })
52         self._sort_formats(formats)
53
54         return {
55             'id': video_id,
56             'title': title,
57             'description': video_data.get('description'),
58             'thumbnail': video_data.get('poster'),
59             'duration': int_or_none(video_data.get('duration')),
60             'uploader_id': video_data.get('account'),
61             'formats': formats,
62         }