X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fodnoklassniki.py;h=854b6800c18bd47d2ed1b40cace0969a22eebc75;hb=939be9adfe810ada7dbd5e9032bcfec19fafa14b;hp=f9e064a60e445668200b759ca4e0ad1a6f7c28ab;hpb=10e6ed93417fb51c606af2e3e47b9a5a094dd6ee;p=youtube-dl diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index f9e064a60..854b6800c 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -2,7 +2,12 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote +from ..compat import ( + compat_etree_fromstring, + compat_parse_qs, + compat_urllib_parse_unquote, + compat_urllib_parse_urlparse, +) from ..utils import ( ExtractorError, unified_strdate, @@ -32,8 +37,8 @@ class OdnoklassnikiIE(InfoExtractor): 'skip': 'Video has been blocked', }, { # metadataUrl - 'url': 'http://ok.ru/video/63567059965189-0', - 'md5': '9676cf86eff5391d35dea675d224e131', + 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5', + 'md5': '6ff470ea2dd51d5d18c295a355b0b6bc', 'info_dict': { 'id': '63567059965189-0', 'ext': 'mp4', @@ -44,11 +49,12 @@ class OdnoklassnikiIE(InfoExtractor): 'uploader': '☭ Андрей Мещанинов ☭', 'like_count': int, 'age_limit': 0, + 'start_time': 5, }, }, { # YouTube embed (metadataUrl, provider == USER_YOUTUBE) 'url': 'http://ok.ru/video/64211978996595-1', - 'md5': '5d7475d428845cd2e13bae6f1a992278', + 'md5': '2f206894ffb5dbfcce2c5a14b909eea5', 'info_dict': { 'id': '64211978996595-1', 'ext': 'mp4', @@ -56,10 +62,27 @@ class OdnoklassnikiIE(InfoExtractor): 'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0', 'duration': 440, 'upload_date': '20150826', - 'uploader_id': '750099571', - 'uploader': 'Алина П', + 'uploader_id': 'tvroscosmos', + 'uploader': 'Телестудия Роскосмоса', 'age_limit': 0, }, + }, { + # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field) + 'url': 'http://ok.ru/video/62036049272859-0', + 'info_dict': { + 'id': '62036049272859-0', + 'ext': 'mp4', + 'title': 'МУЗЫКА ДОЖДЯ .', + 'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0', + 'upload_date': '20120106', + 'uploader_id': '473534735899', + 'uploader': 'МARINA D', + 'age_limit': 0, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Video has not been found', }, { 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', 'only_matching': True, @@ -78,6 +101,9 @@ class OdnoklassnikiIE(InfoExtractor): }] def _real_extract(self, url): + start_time = int_or_none(compat_parse_qs( + compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0]) + video_id = self._match_id(url) webpage = self._download_webpage( @@ -106,7 +132,14 @@ class OdnoklassnikiIE(InfoExtractor): video_id, 'Downloading metadata JSON') movie = metadata['movie'] - title = movie['title'] + + # Some embedded videos may not contain title in movie dict (e.g. + # http://ok.ru/video/62036049272859-0) thus we allow missing title + # here and it's going to be extracted later by an extractor that + # will process the actual embed. + provider = metadata.get('provider') + title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title') + thumbnail = movie.get('poster') duration = int_or_none(movie.get('duration')) @@ -135,23 +168,42 @@ class OdnoklassnikiIE(InfoExtractor): 'uploader_id': uploader_id, 'like_count': like_count, 'age_limit': age_limit, + 'start_time': start_time, } - if metadata.get('provider') == 'USER_YOUTUBE': + if provider == 'USER_YOUTUBE': info.update({ '_type': 'url_transparent', 'url': movie['contentId'], }) return info - quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd')) + quality = qualities(('4', '0', '1', '2', '3', '5')) formats = [{ 'url': f['url'], 'ext': 'mp4', 'format_id': f['name'], - 'quality': quality(f['name']), } for f in metadata['videos']] + + m3u8_url = metadata.get('hlsManifestUrl') + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + + dash_manifest = metadata.get('metadataEmbedded') + if dash_manifest: + formats.extend(self._parse_mpd_formats( + compat_etree_fromstring(dash_manifest), 'mpd')) + + for fmt in formats: + fmt_type = self._search_regex( + r'\btype[/=](\d)', fmt['url'], + 'format type', default=None) + if fmt_type: + fmt['quality'] = quality(fmt_type) + self._sort_formats(formats) info['formats'] = formats