From: Sergey M․ Date: Wed, 22 Jul 2015 18:12:46 +0000 (+0600) Subject: Merge branch 'patch/enhance-tagesschau-regex' of https://github.com/rohieb/youtube... X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=c356620ec14033969d53ca8a2dd432b79046ee20;hp=726adc43ec184b6d7e05cdbc197aa02c53b32347;p=youtube-dl Merge branch 'patch/enhance-tagesschau-regex' of https://github.com/rohieb/youtube-dl into rohieb-patch/enhance-tagesschau-regex --- diff --git a/AUTHORS b/AUTHORS index 531ec5767..e75e9885d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -132,3 +132,6 @@ George Brighton Remita Amine Aurélio A. Heckert Bernhard Minks +sceext +Zach Bruggeman +Tjark Saul diff --git a/README.md b/README.md index a2cc89cdb..ac54d7b67 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ which means you can modify it, redistribute it or use it however you like. ## Video Selection: --playlist-start NUMBER Playlist video to start at (default is 1) --playlist-end NUMBER Playlist video to end at (default is last) - --playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" + --playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13. --match-title REGEX Download only matching titles (regex or caseless sub-string) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a84878026..73445137f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -28,6 +28,7 @@ - **anitube.se** - **AnySex** - **Aparat** + - **AppleConnect** - **AppleDaily**: 臺灣蘋果日報 - **AppleTrailers** - **archive.org**: archive.org videos @@ -486,6 +487,7 @@ - **SportBox** - **SportBoxEmbed** - **SportDeutschland** + - **Sportschau** - **Srf** - **SRMediathek**: Saarländischer Rundfunk - **SSA** @@ -611,8 +613,8 @@ - **Vimple**: Vimple - one-click video hosting - **Vine** - **vine:user** - - **vk.com** - - **vk.com:user-videos**: vk.com:All of a user's videos + - **vk**: VK + - **vk:uservideos**: VK - User's Videos - **Vodlocker** - **VoiceRepublic** - **Vporn** diff --git a/test/test_utils.py b/test/test_utils.py index e13e11b59..65692a9fb 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -324,6 +324,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('02:03:04'), 7384) self.assertEqual(parse_duration('01:02:03:04'), 93784) self.assertEqual(parse_duration('1 hour 3 minutes'), 3780) + self.assertEqual(parse_duration('87 Min.'), 5220) def test_fix_xml_ampersands(self): self.assertEqual( diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 00af78e06..702a6ad50 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1104,7 +1104,8 @@ class YoutubeDL(object): if req_format is None: req_format_list = [] if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and - info_dict['extractor'] in ['youtube', 'ted']): + info_dict['extractor'] in ['youtube', 'ted'] and + not info_dict.get('is_live')): merger = FFmpegMergerPP(self) if merger.available and merger.can_merge(): req_format_list.append('bestvideo+bestaudio') diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index f110830c4..dccc59212 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -8,6 +8,7 @@ from .hls import NativeHlsFD from .http import HttpFD from .rtsp import RtspFD from .rtmp import RtmpFD +from .dash import DashSegmentsFD from ..utils import ( determine_protocol, @@ -20,6 +21,7 @@ PROTOCOL_MAP = { 'mms': RtspFD, 'rtsp': RtspFD, 'f4m': F4mFD, + 'http_dash_segments': DashSegmentsFD, } diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py new file mode 100644 index 000000000..a4685d307 --- /dev/null +++ b/youtube_dl/downloader/dash.py @@ -0,0 +1,66 @@ +from __future__ import unicode_literals + +import re + +from .common import FileDownloader +from ..compat import compat_urllib_request + + +class DashSegmentsFD(FileDownloader): + """ + Download segments in a DASH manifest + """ + def real_download(self, filename, info_dict): + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + base_url = info_dict['url'] + segment_urls = info_dict['segment_urls'] + + is_test = self.params.get('test', False) + remaining_bytes = self._TEST_FILE_SIZE if is_test else None + byte_counter = 0 + + def append_url_to_file(outf, target_url, target_name, remaining_bytes=None): + self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name)) + req = compat_urllib_request.Request(target_url) + if remaining_bytes is not None: + req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) + + data = self.ydl.urlopen(req).read() + + if remaining_bytes is not None: + data = data[:remaining_bytes] + + outf.write(data) + return len(data) + + def combine_url(base_url, target_url): + if re.match(r'^https?://', target_url): + return target_url + return '%s/%s' % (base_url, target_url) + + with open(tmpfilename, 'wb') as outf: + append_url_to_file( + outf, combine_url(base_url, info_dict['initialization_url']), + 'initialization segment') + for i, segment_url in enumerate(segment_urls): + segment_len = append_url_to_file( + outf, combine_url(base_url, segment_url), + 'segment %d / %d' % (i + 1, len(segment_urls)), + remaining_bytes) + byte_counter += segment_len + if remaining_bytes is not None: + remaining_bytes -= segment_len + if remaining_bytes <= 0: + break + + self.try_rename(tmpfilename, filename) + + self._hook_progress({ + 'downloaded_bytes': byte_counter, + 'total_bytes': byte_counter, + 'filename': filename, + 'status': 'finished', + }) + + return True diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 06f21064b..3cfa804ec 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -19,9 +19,14 @@ from .anysex import AnySexIE from .aol import AolIE from .allocine import AllocineIE from .aparat import AparatIE +from .appleconnect import AppleConnectIE from .appletrailers import AppleTrailersIE from .archiveorg import ArchiveOrgIE -from .ard import ARDIE, ARDMediathekIE +from .ard import ( + ARDIE, + ARDMediathekIE, + SportschauIE, +) from .arte import ( ArteTvIE, ArteTVPlus7IE, @@ -270,6 +275,7 @@ from .kuwo import ( ) from .la7 import LA7IE from .laola1tv import Laola1TvIE +from .lecture2go import Lecture2GoIE from .letv import ( LetvIE, LetvTvIE, diff --git a/youtube_dl/extractor/appleconnect.py b/youtube_dl/extractor/appleconnect.py new file mode 100644 index 000000000..ea7a70393 --- /dev/null +++ b/youtube_dl/extractor/appleconnect.py @@ -0,0 +1,50 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + str_to_int, + ExtractorError +) + + +class AppleConnectIE(InfoExtractor): + _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P[\w-]+)' + _TEST = { + 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3', + 'md5': '10d0f2799111df4cb1c924520ca78f98', + 'info_dict': { + 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3', + 'ext': 'm4v', + 'title': 'Energy', + 'uploader': 'Drake', + 'thumbnail': 'http://is5.mzstatic.com/image/thumb/Video5/v4/78/61/c5/7861c5fa-ad6d-294b-1464-cf7605b911d6/source/1920x1080sr.jpg', + 'upload_date': '20150710', + 'timestamp': 1436545535, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + try: + video_json = self._html_search_regex( + r'class="auc-video-data">(\{.*?\})', webpage, 'json') + except ExtractorError: + raise ExtractorError('This post doesn\'t contain a video', expected=True) + + video_data = self._parse_json(video_json, video_id) + timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp')) + like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count')) + + return { + 'id': video_id, + 'url': video_data['sslSrc'], + 'title': video_data['title'], + 'description': video_data['description'], + 'uploader': video_data['artistName'], + 'thumbnail': video_data['artworkUrl'], + 'timestamp': timestamp, + 'like_count': like_count, + } diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 6a35ea463..6f465789b 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -8,6 +8,7 @@ from .generic import GenericIE from ..utils import ( determine_ext, ExtractorError, + get_element_by_attribute, qualities, int_or_none, parse_duration, @@ -22,19 +23,125 @@ class ARDMediathekIE(InfoExtractor): _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' _TESTS = [{ - 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', - 'only_matching': True, + 'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114', + 'info_dict': { + 'id': '29582122', + 'ext': 'mp4', + 'title': 'Ich liebe das Leben trotzdem', + 'description': 'md5:45e4c225c72b27993314b31a84a5261c', + 'duration': 4557, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { - 'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916', + 'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916', + 'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e', 'info_dict': { - 'id': '22490580', + 'id': '29522730', 'ext': 'mp4', - 'title': 'Das Wunder von Wolbeck (Video tgl. ab 20 Uhr)', - 'description': 'Auf einem restaurierten Hof bei Wolbeck wird der Heilpraktiker Raffael Lembeck eines morgens von seiner Frau Stella tot aufgefunden. Das Opfer war offensichtlich in seiner Praxis zu Fall gekommen und ist dann verblutet, erklärt Prof. Boerne am Tatort.', + 'title': 'Tatort: Scheinwelten - Hörfassung (Video tgl. ab 20 Uhr)', + 'description': 'md5:196392e79876d0ac94c94e8cdb2875f1', + 'duration': 5252, }, - 'skip': 'Blocked outside of Germany', + }, { + # audio + 'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086', + 'md5': '219d94d8980b4f538c7fcb0865eb7f2c', + 'info_dict': { + 'id': '28488308', + 'ext': 'mp3', + 'title': 'Tod eines Fußballers', + 'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef', + 'duration': 3240, + }, + }, { + 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', + 'only_matching': True, }] + def _extract_media_info(self, media_info_url, webpage, video_id): + media_info = self._download_json( + media_info_url, video_id, 'Downloading media JSON') + + formats = self._extract_formats(media_info, video_id) + + if not formats: + if '"fsk"' in webpage: + raise ExtractorError( + 'This video is only available after 20:00', expected=True) + elif media_info.get('_geoblocked'): + raise ExtractorError('This video is not available due to geo restriction', expected=True) + + self._sort_formats(formats) + + duration = int_or_none(media_info.get('_duration')) + thumbnail = media_info.get('_previewImage') + + subtitles = {} + subtitle_url = media_info.get('_subtitleUrl') + if subtitle_url: + subtitles['de'] = [{ + 'ext': 'srt', + 'url': subtitle_url, + }] + + return { + 'id': video_id, + 'duration': duration, + 'thumbnail': thumbnail, + 'formats': formats, + 'subtitles': subtitles, + } + + def _extract_formats(self, media_info, video_id): + type_ = media_info.get('_type') + media_array = media_info.get('_mediaArray', []) + formats = [] + for num, media in enumerate(media_array): + for stream in media.get('_mediaStreamArray', []): + stream_urls = stream.get('_stream') + if not stream_urls: + continue + if not isinstance(stream_urls, list): + stream_urls = [stream_urls] + quality = stream.get('_quality') + server = stream.get('_server') + for stream_url in stream_urls: + ext = determine_ext(stream_url) + if ext == 'f4m': + formats.extend(self._extract_f4m_formats( + stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', + video_id, preference=-1, f4m_id='hds')) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + stream_url, video_id, 'mp4', preference=1, m3u8_id='hls')) + else: + if server and server.startswith('rtmp'): + f = { + 'url': server, + 'play_path': stream_url, + 'format_id': 'a%s-rtmp-%s' % (num, quality), + } + elif stream_url.startswith('http'): + f = { + 'url': stream_url, + 'format_id': 'a%s-%s-%s' % (num, ext, quality) + } + else: + continue + m = re.search(r'_(?P\d+)x(?P\d+)\.mp4$', stream_url) + if m: + f.update({ + 'width': int(m.group('width')), + 'height': int(m.group('height')), + }) + if type_ == 'audio': + f['vcodec'] = 'none' + formats.append(f) + return formats + def _real_extract(self, url): # determine video id from url m = re.match(self._VALID_URL, url) @@ -92,46 +199,22 @@ class ARDMediathekIE(InfoExtractor): 'format_id': fid, 'url': furl, }) + self._sort_formats(formats) + info = { + 'formats': formats, + } else: # request JSON file - media_info = self._download_json( - 'http://www.ardmediathek.de/play/media/%s' % video_id, video_id) - # The second element of the _mediaArray contains the standard http urls - streams = media_info['_mediaArray'][1]['_mediaStreamArray'] - if not streams: - if '"fsk"' in webpage: - raise ExtractorError('This video is only available after 20:00') - - formats = [] - for s in streams: - if type(s['_stream']) == list: - for index, url in enumerate(s['_stream'][::-1]): - quality = s['_quality'] + index - formats.append({ - 'quality': quality, - 'url': url, - 'format_id': '%s-%s' % (determine_ext(url), quality) - }) - continue - - format = { - 'quality': s['_quality'], - 'url': s['_stream'], - } - - format['format_id'] = '%s-%s' % ( - determine_ext(format['url']), format['quality']) + info = self._extract_media_info( + 'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id) - formats.append(format) - - self._sort_formats(formats) - - return { + info.update({ 'id': video_id, 'title': title, 'description': description, - 'formats': formats, 'thumbnail': thumbnail, - } + }) + + return info class ARDIE(InfoExtractor): @@ -189,3 +272,41 @@ class ARDIE(InfoExtractor): 'upload_date': upload_date, 'thumbnail': thumbnail, } + + +class SportschauIE(ARDMediathekIE): + IE_NAME = 'Sportschau' + _VALID_URL = r'(?Phttps?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video(?P[^/#?]+))\.html' + _TESTS = [{ + 'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html', + 'info_dict': { + 'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100', + 'ext': 'mp4', + 'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + base_url = mobj.group('baseurl') + + webpage = self._download_webpage(url, video_id) + title = get_element_by_attribute('class', 'headline', webpage) + description = self._html_search_meta('description', webpage, 'description') + + info = self._extract_media_info( + base_url + '-mc_defaultQuality-h.json', webpage, video_id) + + info.update({ + 'title': title, + 'description': description, + }) + + return info diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index d4f98ca16..75723c00d 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -6,15 +6,11 @@ import re import json from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlparse, - compat_urlparse, -) +from ..compat import compat_urlparse from ..utils import ( clean_html, ExtractorError, int_or_none, - float_or_none, parse_duration, determine_ext, ) @@ -59,12 +55,12 @@ class FranceTVBaseInfoExtractor(InfoExtractor): # See https://github.com/rg3/youtube-dl/issues/3963 # m3u8 urls work fine continue - video_url_parsed = compat_urllib_parse_urlparse(video_url) f4m_url = self._download_webpage( - 'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url_parsed.path, + 'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url, video_id, 'Downloading f4m manifest token', fatal=False) if f4m_url: - formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id)) + formats.extend(self._extract_f4m_formats( + f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, 1, format_id)) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id)) elif video_url.startswith('rtmp'): @@ -165,8 +161,10 @@ class FranceTVIE(FranceTVBaseInfoExtractor): (?: (?:www\.)?france[2345o]\.fr/ (?: - emissions/[^/]+/(?:videos|diffusions)?| - videos + emissions/[^/]+/(?:videos|diffusions)| + emission/[^/]+| + videos| + jt ) /| embed\.francetv\.fr/\?ue= @@ -229,15 +227,16 @@ class FranceTVIE(FranceTVBaseInfoExtractor): }, # franceo { - 'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013', - 'md5': '52f0bfe202848b15915a2f39aaa8981b', + 'url': 'http://www.franceo.fr/jt/info-soir/18-07-2015', + 'md5': '47d5816d3b24351cdce512ad7ab31da8', 'info_dict': { - 'id': '108634970', + 'id': '125377621', 'ext': 'flv', - 'title': 'Infô Afrique', - 'description': 'md5:ebf346da789428841bee0fd2a935ea55', - 'upload_date': '20140915', - 'timestamp': 1410822000, + 'title': 'Infô soir', + 'description': 'md5:01b8c6915a3d93d8bbbd692651714309', + 'upload_date': '20150718', + 'timestamp': 1437241200, + 'duration': 414, }, }, { @@ -247,7 +246,7 @@ class FranceTVIE(FranceTVBaseInfoExtractor): 'id': 'EV_30231', 'ext': 'flv', 'title': 'Alcaline, le concert avec Calogero', - 'description': 'md5:', + 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff', 'upload_date': '20150226', 'timestamp': 1424989860, 'duration': 5400, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index dc24a8a8b..cd133a10c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1196,6 +1196,12 @@ class GenericIE(InfoExtractor): if vimeo_url is not None: return self.url_result(vimeo_url) + vid_me_embed_url = self._search_regex( + r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', + webpage, 'vid.me embed', default=None) + if vid_me_embed_url is not None: + return self.url_result(vid_me_embed_url, 'Vidme') + # Look for embedded YouTube player matches = re.findall(r'''(?x) (?: diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index d93b0867d..afb7f4e61 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -81,6 +81,9 @@ class IqiyiIE(InfoExtractor): 'title': '名侦探柯南第752集', }, }], + 'params': { + 'skip_download': True, + }, }] _FORMATS_MAP = [ diff --git a/youtube_dl/extractor/lecture2go.py b/youtube_dl/extractor/lecture2go.py new file mode 100644 index 000000000..40a3d2346 --- /dev/null +++ b/youtube_dl/extractor/lecture2go.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + parse_duration, + int_or_none, +) + + +class Lecture2GoIE(InfoExtractor): + _VALID_URL = r'https?://lecture2go\.uni-hamburg\.de/veranstaltungen/-/v/(?P\d+)' + _TEST = { + 'url': 'https://lecture2go.uni-hamburg.de/veranstaltungen/-/v/17473', + 'md5': 'ac02b570883020d208d405d5a3fd2f7f', + 'info_dict': { + 'id': '17473', + 'ext': 'flv', + 'title': '2 - Endliche Automaten und reguläre Sprachen', + 'creator': 'Frank Heitmann', + 'duration': 5220, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r']+class="title">(.+)', webpage, 'title') + + formats = [] + for url in set(re.findall(r'"src","([^"]+)"', webpage)): + ext = determine_ext(url) + if ext == 'f4m': + formats.extend(self._extract_f4m_formats(url, video_id)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats(url, video_id)) + else: + formats.append({ + 'url': url, + }) + + self._sort_formats(formats) + + creator = self._html_search_regex( + r']+id="description">([^<]+)', webpage, 'creator', fatal=False) + duration = parse_duration(self._html_search_regex( + r'Duration:\s*\s*]*>([^<]+)', webpage, 'duration', fatal=False)) + view_count = int_or_none(self._html_search_regex( + r'Views:\s*\s*]+>(\d+)', webpage, 'view count', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'creator': creator, + 'duration': duration, + 'view_count': view_count, + } diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index fec5d65ad..a53479aad 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -32,7 +32,7 @@ class PBSIE(InfoExtractor): 'info_dict': { 'id': '2365006249', 'ext': 'mp4', - 'title': 'A More Perfect Union', + 'title': 'Constitution USA with Peter Sagal - A More Perfect Union', 'description': 'md5:ba0c207295339c8d6eced00b7c363c6a', 'duration': 3190, }, @@ -46,7 +46,7 @@ class PBSIE(InfoExtractor): 'info_dict': { 'id': '2365297690', 'ext': 'mp4', - 'title': 'Losing Iraq', + 'title': 'FRONTLINE - Losing Iraq', 'description': 'md5:f5bfbefadf421e8bb8647602011caf8e', 'duration': 5050, }, @@ -60,7 +60,7 @@ class PBSIE(InfoExtractor): 'info_dict': { 'id': '2201174722', 'ext': 'mp4', - 'title': 'Cyber Schools Gain Popularity, but Quality Questions Persist', + 'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist', 'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28', 'duration': 801, }, @@ -72,7 +72,7 @@ class PBSIE(InfoExtractor): 'id': '2365297708', 'ext': 'mp4', 'description': 'md5:68d87ef760660eb564455eb30ca464fe', - 'title': 'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full', + 'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full', 'duration': 6559, 'thumbnail': 're:^https?://.*\.jpg$', }, @@ -88,7 +88,7 @@ class PBSIE(InfoExtractor): 'display_id': 'killer-typhoon', 'ext': 'mp4', 'description': 'md5:c741d14e979fc53228c575894094f157', - 'title': 'Killer Typhoon', + 'title': 'NOVA - Killer Typhoon', 'duration': 3172, 'thumbnail': 're:^https?://.*\.jpg$', 'upload_date': '20140122', @@ -110,7 +110,7 @@ class PBSIE(InfoExtractor): 'id': '2280706814', 'display_id': 'player', 'ext': 'mp4', - 'title': 'Death and the Civil War', + 'title': 'American Experience - Death and the Civil War', 'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.', 'duration': 6705, 'thumbnail': 're:^https?://.*\.jpg$', @@ -118,6 +118,21 @@ class PBSIE(InfoExtractor): 'params': { 'skip_download': True, # requires ffmpeg }, + }, + { + 'url': 'http://video.pbs.org/video/2365367186/', + 'info_dict': { + 'id': '2365367186', + 'display_id': '2365367186', + 'ext': 'mp4', + 'title': 'To Catch A Comet - Full Episode', + 'description': 'On November 12, 2014, billions of kilometers from Earth, spacecraft orbiter Rosetta and lander Philae did what no other had dared to attempt \u2014 land on the volatile surface of a comet as it zooms around the sun at 67,000 km/hr. The European Space Agency hopes this mission can help peer into our past and unlock secrets of our origins.', + 'duration': 3342, + 'thumbnail': 're:^https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, # requires ffmpeg + }, } ] @@ -232,6 +247,12 @@ class PBSIE(InfoExtractor): 'url': closed_captions_url, }] + # info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc) + # Try turning it to 'program - title' naming scheme if possible + alt_title = info.get('program', {}).get('title') + if alt_title: + info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title']) + return { 'id': video_id, 'display_id': display_id, diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py index cf495f310..6977afb27 100644 --- a/youtube_dl/extractor/snagfilms.py +++ b/youtube_dl/extractor/snagfilms.py @@ -23,6 +23,15 @@ class SnagFilmsEmbedIE(InfoExtractor): 'ext': 'mp4', 'title': '#whilewewatch', } + }, { + # invalid labels, 360p is better that 480p + 'url': 'http://www.snagfilms.com/embed/player?filmId=17ca0950-a74a-11e0-a92a-0026bb61d036', + 'md5': '882fca19b9eb27ef865efeeaed376a48', + 'info_dict': { + 'id': '17ca0950-a74a-11e0-a92a-0026bb61d036', + 'ext': 'mp4', + 'title': 'Life in Limbo', + } }, { 'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017', 'only_matching': True, @@ -52,14 +61,15 @@ class SnagFilmsEmbedIE(InfoExtractor): if not file_: continue type_ = source.get('type') - format_id = source.get('label') ext = determine_ext(file_) - if any(_ == 'm3u8' for _ in (type_, ext)): + format_id = source.get('label') or ext + if all(v == 'm3u8' for v in (type_, ext)): formats.extend(self._extract_m3u8_formats( file_, video_id, 'mp4', m3u8_id='hls')) else: bitrate = int_or_none(self._search_regex( - r'(\d+)kbps', file_, 'bitrate', default=None)) + [r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext], + file_, 'bitrate', default=None)) height = int_or_none(self._search_regex( r'^(\d+)[pP]$', format_id, 'height', default=None)) formats.append({ diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index 4a755c657..636607db5 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -11,13 +11,13 @@ class TagesschauIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:sendung/(ts|tsg|tt|nm|bab/bab)|video/video|tsvorzwanzig)(?P-?[0-9]+)(?:~[-_a-zA-Z0-9]*)?\.html' _TESTS = [{ - 'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html', - 'md5': 'bcdeac2194fb296d599ce7929dfa4009', + 'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html', + 'md5': '917a228bc7df7850783bc47979673a09', 'info_dict': { - 'id': '1399128', + 'id': '102143', 'ext': 'mp4', - 'title': 'Harald Range, Generalbundesanwalt, zu den Ermittlungen', - 'description': 'md5:69da3c61275b426426d711bde96463ab', + 'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt', + 'description': 'md5:171feccd9d9b3dd54d05d501568f6359', 'thumbnail': 're:^http:.*\.jpg$', }, }, { diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 9ead13a91..3d3b635e4 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -4,8 +4,6 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from .pornhub import PornHubIE -from .vimeo import VimeoIE class TumblrIE(InfoExtractor): @@ -60,26 +58,16 @@ class TumblrIE(InfoExtractor): blog = m_url.group('blog_name') url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id) - webpage = self._download_webpage(url, video_id) - - vid_me_embed_url = self._search_regex( - r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', - webpage, 'vid.me embed', default=None) - if vid_me_embed_url is not None: - return self.url_result(vid_me_embed_url, 'Vidme') - - pornhub_url = PornHubIE._extract_url(webpage) - if pornhub_url: - return self.url_result(pornhub_url, 'PornHub') - - vimeo_url = VimeoIE._extract_vimeo_url(url, webpage) - if vimeo_url: - return self.url_result(vimeo_url, 'Vimeo') + webpage, urlh = self._download_webpage_handle(url, video_id) iframe_url = self._search_regex( r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'', - webpage, 'iframe url') - iframe = self._download_webpage(iframe_url, video_id) + webpage, 'iframe url', default=None) + if iframe_url is None: + return self.url_result(urlh.geturl(), 'Generic') + + iframe = self._download_webpage(iframe_url, video_id, + 'Downloading iframe page') video_url = self._search_regex(r'[^/]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE + _VALID_URL = r'%s/(?P[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE - _TEST = { + _TESTS = [{ 'url': 'http://www.twitch.tv/shroomztv', 'info_dict': { 'id': '12772022048', @@ -331,7 +331,10 @@ class TwitchStreamIE(TwitchBaseIE): # m3u8 download 'skip_download': True, }, - } + }, { + 'url': 'http://www.twitch.tv/miracle_doto#profile-0', + 'only_matching': True, + }] def _real_extract(self, url): channel_id = self._match_id(url) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index e2bab52fe..4a0eaf65f 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -77,7 +77,11 @@ class UdemyIE(InfoExtractor): login_popup = self._download_webpage( self._LOGIN_URL, None, 'Downloading login popup') - if login_popup == '
': + def is_logged(webpage): + return any(p in webpage for p in ['href="https://www.udemy.com/user/logout/', '>Logout<']) + + # already logged in + if is_logged(login_popup): return login_form = self._form_hidden_inputs('login-form', login_popup) @@ -95,8 +99,7 @@ class UdemyIE(InfoExtractor): response = self._download_webpage( request, None, 'Logging in as %s' % username) - if all(logout_pattern not in response - for logout_pattern in ['href="https://www.udemy.com/user/logout/', '>Logout<']): + if not is_logged(response): error = self._html_search_regex( r'(?s)]+class="form-errors[^"]*">(.+?)', response, 'error message', default=None) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 04e2b0ba7..01af7a995 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -1,5 +1,4 @@ from __future__ import unicode_literals -import re from .common import InfoExtractor from .ooyala import OoyalaIE @@ -7,25 +6,29 @@ from ..utils import ExtractorError class ViceIE(InfoExtractor): - _VALID_URL = r'http://www\.vice\.com/.*?/(?P.+)' + _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)+(?P.+)' - _TEST = { - 'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1', - 'info_dict': { - 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', - 'ext': 'mp4', - 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', - }, - 'params': { - # Requires ffmpeg (m3u8 manifest) - 'skip_download': True, - }, - } + _TESTS = [ + { + 'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1', + 'info_dict': { + 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', + 'ext': 'mp4', + 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', + }, + 'params': { + # Requires ffmpeg (m3u8 manifest) + 'skip_download': True, + }, + }, { + 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', + 'only_matching': True, + } + ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - name = mobj.group('name') - webpage = self._download_webpage(url, name) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) try: embed_code = self._search_regex( r'embedCode=([^&\'"]+)', webpage, diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index 1742e66f4..6ef36290b 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -1,129 +1,137 @@ +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_request +from ..compat import ( + compat_urllib_request, + compat_urllib_parse, +) +from ..utils import ( + determine_ext, + int_or_none, + parse_iso8601, +) class ViewsterIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?viewster\.com/movie/(?P\d+-\d+-\d+)' + _VALID_URL = r'http://(?:www\.)?viewster\.com/(?:serie|movie)/(?P\d+-\d+-\d+)' _TESTS = [{ - # movielink, paymethod=fre - 'url': 'http://www.viewster.com/movie/1293-19341-000/hout-wood/', - 'playlist': [{ - 'md5': '8f9d94b282d80c42b378dffdbb11caf3', - 'info_dict': { - 'id': '1293-19341-000-movie', - 'ext': 'flv', - 'title': "'Hout' (Wood) - Movie", - }, - }], - 'info_dict': { - 'id': '1293-19341-000', - 'title': "'Hout' (Wood)", - 'description': 'md5:925733185a9242ef96f436937683f33b', - } - }, { - # movielink, paymethod=adv + # movie, Type=Movie 'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/', - 'playlist': [{ - 'md5': '77a005453ca7396cbe3d35c9bea30aef', - 'info_dict': { - 'id': '1140-11855-000-movie', - 'ext': 'flv', - 'title': "THE LISTENING PROJECT - Movie", - }, - }], + 'md5': '14d3cfffe66d57b41ae2d9c873416f01', 'info_dict': { 'id': '1140-11855-000', - 'title': "THE LISTENING PROJECT", - 'description': 'md5:714421ae9957e112e672551094bf3b08', - } + 'ext': 'flv', + 'title': 'The listening Project', + 'description': 'md5:bac720244afd1a8ea279864e67baa071', + 'timestamp': 1214870400, + 'upload_date': '20080701', + 'duration': 4680, + }, }, { - # direct links, no movielink - 'url': 'http://www.viewster.com/movie/1198-56411-000/sinister/', - 'playlist': [{ - 'md5': '0307b7eac6bfb21ab0577a71f6eebd8f', - 'info_dict': { - 'id': '1198-56411-000-trailer', - 'ext': 'mp4', - 'title': "Sinister - Trailer", - }, - }, { - 'md5': '80b9ee3ad69fb368f104cb5d9732ae95', - 'info_dict': { - 'id': '1198-56411-000-behind-scenes', - 'ext': 'mp4', - 'title': "Sinister - Behind Scenes", - }, - }, { - 'md5': '3b3ea897ecaa91fca57a8a94ac1b15c5', - 'info_dict': { - 'id': '1198-56411-000-scene-from-movie', - 'ext': 'mp4', - 'title': "Sinister - Scene from movie", - }, - }], + # series episode, Type=Episode + 'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/', + 'md5': 'd5434c80fcfdb61651cc2199a88d6ba3', 'info_dict': { - 'id': '1198-56411-000', - 'title': "Sinister", - 'description': 'md5:014c40b0488848de9683566a42e33372', - } + 'id': '1284-19427-001', + 'ext': 'flv', + 'title': 'The World and a Wall', + 'description': 'md5:24814cf74d3453fdf5bfef9716d073e3', + 'timestamp': 1428192000, + 'upload_date': '20150405', + 'duration': 1500, + }, + }, { + # serie, Type=Serie + 'url': 'http://www.viewster.com/serie/1303-19426-000/', + 'info_dict': { + 'id': '1303-19426-000', + 'title': 'Is It Wrong to Try to Pick up Girls in a Dungeon?', + 'description': 'md5:eeda9bef25b0d524b3a29a97804c2f11', + }, + 'playlist_count': 13, + }, { + # unfinished serie, no Type + 'url': 'http://www.viewster.com/serie/1284-19427-000/baby-steps-season-2/', + 'info_dict': { + 'id': '1284-19427-000', + 'title': 'Baby Steps—Season 2', + 'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1', + }, + 'playlist_mincount': 16, }] _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01' + _AUTH_TOKEN = '/YqhSYsx8EaU9Bsta3ojlA==' - def _real_extract(self, url): - video_id = self._match_id(url) - - request = compat_urllib_request.Request( - 'http://api.live.viewster.com/api/v1/movie/%s' % video_id) + def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True): + request = compat_urllib_request.Request(url) request.add_header('Accept', self._ACCEPT_HEADER) + request.add_header('Auth-token', self._AUTH_TOKEN) + return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal) - movie = self._download_json( - request, video_id, 'Downloading movie metadata JSON') - - title = movie.get('title') or movie['original_title'] - description = movie.get('synopsis') - thumbnail = movie.get('large_artwork') or movie.get('artwork') - - entries = [] - for clip in movie['play_list']: - entry = None - - # movielink api - link_request = clip.get('link_request') - if link_request: - request = compat_urllib_request.Request( - 'http://api.live.viewster.com/api/v1/movielink?movieid=%(movieid)s&action=%(action)s&paymethod=%(paymethod)s&price=%(price)s¤cy=%(currency)s&language=%(language)s&subtitlelanguage=%(subtitlelanguage)s&ischromecast=%(ischromecast)s' - % link_request) - request.add_header('Accept', self._ACCEPT_HEADER) + def _real_extract(self, url): + video_id = self._match_id(url) - movie_link = self._download_json( - request, video_id, 'Downloading movie link JSON', fatal=False) + info = self._download_json( + 'https://public-api.viewster.com/search/%s' % video_id, + video_id, 'Downloading entry JSON') - if movie_link: - formats = self._extract_f4m_formats( - movie_link['url'] + '&hdcore=3.2.0&plugin=flowplayer-3.2.0.1', video_id) - self._sort_formats(formats) - entry = { - 'formats': formats, - } + entry_id = info.get('Id') or info['id'] - # direct link - clip_url = clip.get('clip_data', {}).get('url') - if clip_url: - entry = { - 'url': clip_url, - 'ext': 'mp4', - } + # unfinished serie has no Type + if info.get('Type') in ['Serie', None]: + episodes = self._download_json( + 'https://public-api.viewster.com/series/%s/episodes' % entry_id, + video_id, 'Downloading series JSON') + entries = [ + self.url_result( + 'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster') + for episode in episodes] + title = (info.get('Title') or info['Synopsis']['Title']).strip() + description = info.get('Synopsis', {}).get('Detailed') + return self.playlist_result(entries, video_id, title, description) - if entry: - entry.update({ - 'id': '%s-%s' % (video_id, clip['canonical_title']), - 'title': '%s - %s' % (title, clip['title']), + formats = [] + for media_type in ('application/f4m+xml', 'application/x-mpegURL'): + media = self._download_json( + 'https://public-api.viewster.com/movies/%s/video?mediaType=%s' + % (entry_id, compat_urllib_parse.quote(media_type)), + video_id, 'Downloading %s JSON' % media_type, fatal=False) + if not media: + continue + video_url = media.get('Uri') + if not video_url: + continue + ext = determine_ext(video_url) + if ext == 'f4m': + video_url += '&' if '?' in video_url else '?' + video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1' + formats.extend(self._extract_f4m_formats( + video_url, video_id, f4m_id='hds')) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id='hls', + fatal=False # m3u8 sometimes fail + )) + else: + formats.append({ + 'url': video_url, }) - entries.append(entry) + self._sort_formats(formats) - playlist = self.playlist_result(entries, video_id, title, description) - playlist['thumbnail'] = thumbnail - return playlist + synopsis = info.get('Synopsis', {}) + # Prefer title outside synopsis since it's less messy + title = (info.get('Title') or synopsis['Title']).strip() + description = synopsis.get('Detailed') or info.get('Synopsis', {}).get('Short') + duration = int_or_none(info.get('Duration')) + timestamp = parse_iso8601(info.get('ReleaseDate')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'duration': duration, + 'formats': formats, + } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e7f5c7861..3d8b31f98 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -535,7 +535,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'dorappi2000', 'formats': 'mincount:33', }, - } + }, + # DASH manifest with segment_list + { + 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8', + 'md5': '8ce563a1d667b599d21064e982ab9e31', + 'info_dict': { + 'id': 'CsmdDsKjzN8', + 'ext': 'mp4', + 'upload_date': '20150501', # According to '', video_webpage) is not None: age_gate = True # We simulate the access to the video from www.youtube.com/v/{video_id} @@ -922,6 +948,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Convert to the same format returned by compat_parse_qs video_info = dict((k, [v]) for k, v in args.items()) add_dash_mpd(video_info) + if args.get('livestream') == '1' or args.get('live_playback') == 1: + is_live = True if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): # We also try looking in get_video_info since it may contain different dashmpd # URL that points to a DASH manifest with possibly different itag set (some itags @@ -1226,6 +1254,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'dislike_count': dislike_count, 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]), 'formats': formats, + 'is_live': is_live, } diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 85365d769..9016e3498 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -219,7 +219,7 @@ def parseOpts(overrideArguments=None): selection.add_option( '--playlist-items', dest='playlist_items', metavar='ITEM_SPEC', default=None, - help='Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.') + help='Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.') selection.add_option( '--match-title', dest='matchtitle', metavar='REGEX', diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 942f76d24..ae813099d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1309,10 +1309,10 @@ def parse_duration(s): m = re.match( r'''(?ix)(?:P?T)? (?: - (?P[0-9.]+)\s*(?:mins?|minutes?)\s*| + (?P[0-9.]+)\s*(?:mins?\.?|minutes?)\s*| (?P[0-9.]+)\s*(?:hours?)| - \s*(?P[0-9]+)\s*(?:[:h]|hours?)\s*(?P[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*| + \s*(?P[0-9]+)\s*(?:[:h]|hours?)\s*(?P[0-9]+)\s*(?:[:m]|mins?\.?|minutes?)\s*| (?: (?: (?:(?P[0-9]+)\s*(?:[:d]|days?)\s*)? diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3ad7a2bc0..280afdd7f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.07.18' +__version__ = '2015.07.21'