-# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ determine_ext,
+ mimetype2ext,
+)
class TweakersIE(InfoExtractor):
- _VALID_URL = r'https?://tweakers\.net/video/(?P<id>[0-9]+).*'
+ _VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)'
_TEST = {
'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html',
- 'md5': 'f7f7f3027166a7f32f024b4ae6571ced',
+ 'md5': 'fe73e417c093a788e0160c4025f88b15',
'info_dict': {
'id': '9926',
'ext': 'mp4',
- 'title': 'New-Nintendo-3Ds-Xl-Op-Alle-Fronten-Beter',
- # TODO more properties, either as:
- # * A value
- # * MD5 checksum; start the string with md5:
- # * A regular expression; start the string with re:
- # * Any Python type (for example int or float)
+ 'title': 'New Nintendo 3DS XL - Op alle fronten beter',
+ 'description': 'md5:3789b21fed9c0219e9bcaacd43fab280',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'duration': 386,
+ 'uploader_id': 's7JeEm',
}
}
def _real_extract(self, url):
- splitted_url = re.split('.html|/', url)
- del splitted_url[-1] # To remove extra '/' at the end
- video_id = splitted_url[4]
- title = splitted_url[5].title() # Retrieve title for URL and capitalize
- splitted_url[3] = splitted_url[3] + '/player' # Add /player to get the player page
- player_url = '/'.join(splitted_url) + '.html'
- player_page = self._download_webpage(player_url, video_id)
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ 'https://tweakers.net/video/s1playlist/%s/1920/1080/playlist.json' % video_id,
+ video_id)['items'][0]
+
+ title = video_data['title']
+
+ formats = []
+ for location in video_data.get('locations', {}).get('progressive', []):
+ format_id = location.get('label')
+ width = int_or_none(location.get('width'))
+ height = int_or_none(location.get('height'))
+ for source in location.get('sources', []):
+ source_url = source.get('src')
+ if not source_url:
+ continue
+ ext = mimetype2ext(source.get('type')) or determine_ext(source_url)
+ formats.append({
+ 'format_id': format_id,
+ 'url': source_url,
+ 'width': width,
+ 'height': height,
+ 'ext': ext,
+ })
+ self._sort_formats(formats)
return {
'id': video_id,
- 'ext': 'mp4',
'title': title,
- 'url': re.findall('http.*mp4', player_page)[0],
- 'player_url': player_url
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('poster'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'uploader_id': video_data.get('account'),
+ 'formats': formats,
}