X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ffirsttv.py;h=6b662cc3cd78e4acf661af473f2374b5ec2af05c;hp=08ceee4ed7d5e8b96b81e7d8b9b823a5ea18e120;hb=dcdb292fddc82ae11f4c0b647815a45c88a6b6d5;hpb=5f0d813d9395848e92a1c6d83335360652d654c1 diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 08ceee4ed..6b662cc3c 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -1,60 +1,93 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import int_or_none +from ..compat import compat_urlparse +from ..utils import ( + int_or_none, + qualities, + unified_strdate, +) class FirstTVIE(InfoExtractor): - IE_NAME = 'firsttv' - IE_DESC = 'Видеоархив - Первый канал' - _VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P\d+)' + IE_NAME = '1tv' + IE_DESC = 'Первый канал' + _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P[^/?#]+)' - _TEST = { - 'url': 'http://www.1tv.ru/videoarchive/73390', - 'md5': '3de6390cf0cca4a5eae1d1d83895e5ad', + _TESTS = [{ + # single format + 'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015', + 'md5': 'a1b6b60d530ebcf8daacf4565762bbaf', + 'info_dict': { + 'id': '40049', + 'ext': 'mp4', + 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015', + 'description': 'md5:36a39c1d19618fec57d12efe212a8370', + 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', + 'upload_date': '20150212', + 'duration': 2694, + }, + }, { + # multiple formats + 'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016', 'info_dict': { - 'id': '73390', + 'id': '364746', 'ext': 'mp4', - 'title': 'Олимпийские канатные дороги', - 'description': 'md5:cc730d2bf4215463e37fff6a1e277b13', - 'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG', - 'duration': 149, + 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', + 'description': 'md5:a242eea0031fd180a4497d52640a9572', + 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', + 'upload_date': '20160407', + 'duration': 179, + 'formats': 'mincount:3', }, - 'skip': 'Only works from Russia', - } + 'params': { + 'skip_download': True, + }, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + display_id = self._match_id(url) - webpage = self._download_webpage(url, video_id, 'Downloading page') + webpage = self._download_webpage(url, display_id) + playlist_url = compat_urlparse.urljoin(url, self._search_regex( + r'data-playlist-url="([^"]+)', webpage, 'playlist url')) - video_url = self._html_search_regex( - r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL') + item = self._download_json(playlist_url, display_id)[0] + video_id = item['id'] + quality = qualities(('ld', 'sd', 'hd', )) + formats = [] + for f in item.get('mbr', []): + src = f.get('src') + if not src: + continue + fname = f.get('name') + formats.append({ + 'url': src, + 'format_id': fname, + 'quality': quality(fname), + }) + self._sort_formats(formats) title = self._html_search_regex( - r'
\s*

([^<]*)', webpage, 'title') + (r'
\s*

([^<]*)', + r"'title'\s*:\s*'([^']+)'"), + webpage, 'title', default=None) or item['title'] description = self._html_search_regex( - r'
\s*
 
\s*

([^<]*)

', webpage, 'description', fatal=False) - - thumbnail = self._og_search_thumbnail(webpage) - duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False) - - like_count = self._html_search_regex(r'title="Понравилось".*?/> \[(\d+)\]', - webpage, 'like count', fatal=False) - dislike_count = self._html_search_regex(r'title="Не понравилось".*?/> \[(\d+)\]', - webpage, 'dislike count', fatal=False) + r'
\s*
 
\s*

([^<]*)

', + webpage, 'description', default=None) or self._html_search_meta( + 'description', webpage, 'description') + duration = int_or_none(self._html_search_meta( + 'video:duration', webpage, 'video duration', fatal=False)) + upload_date = unified_strdate(self._html_search_meta( + 'ya:ovs:upload_date', webpage, 'upload date', fatal=False)) return { 'id': video_id, - 'url': video_url, - 'thumbnail': thumbnail, + 'thumbnail': item.get('poster') or self._og_search_thumbnail(webpage), 'title': title, 'description': description, + 'upload_date': upload_date, 'duration': int_or_none(duration), - 'like_count': int_or_none(like_count), - 'dislike_count': int_or_none(dislike_count), + 'formats': formats }