From: Philipp Hagemeister Date: Sat, 13 Dec 2014 22:05:41 +0000 (+0100) Subject: Merge remote-tracking branch 'fstirlitz/master' X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=d5524947b560c1d0e1dfa2ef7f1969efe07866fa;hp=7668a2c5cba305bf7dc41b14149fa7f55d4dffd6;p=youtube-dl Merge remote-tracking branch 'fstirlitz/master' --- diff --git a/AUTHORS b/AUTHORS index bfa00f91b..5d45e02fc 100644 --- a/AUTHORS +++ b/AUTHORS @@ -92,3 +92,4 @@ Tithen-Firion Zack Fernandes cryptonaut Adrian Kretz +Mathias Rav diff --git a/test/test_unicode_literals.py b/test/test_unicode_literals.py index d3cba869b..19813e034 100644 --- a/test/test_unicode_literals.py +++ b/test/test_unicode_literals.py @@ -20,7 +20,7 @@ IGNORED_FILES = [ ] -from helper import assertRegexpMatches +from test.helper import assertRegexpMatches class TestUnicodeLiterals(unittest.TestCase): diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 2a566eabe..8181bca09 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -5,8 +5,8 @@ import re import sys import time +from ..compat import compat_str from ..utils import ( - compat_str, encodeFilename, format_bytes, timeconvert, diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 7cd22c504..ef3e0d5f4 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -9,10 +9,12 @@ import xml.etree.ElementTree as etree from .common import FileDownloader from .http import HttpFD +from ..compat import ( + compat_urlparse, +) from ..utils import ( struct_pack, struct_unpack, - compat_urlparse, format_bytes, encodeFilename, sanitize_open, @@ -231,6 +233,7 @@ class F4mFD(FileDownloader): 'continuedl': True, 'quiet': True, 'noprogress': True, + 'ratelimit': self.params.get('ratelimit', None), 'test': self.params.get('test', False), } ) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index ad26cfa40..5bb0f3cfd 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -6,9 +6,11 @@ import subprocess from ..postprocessor.ffmpeg import FFmpegPostProcessor from .common import FileDownloader -from ..utils import ( +from ..compat import ( compat_urlparse, compat_urllib_request, +) +from ..utils import ( check_executable, encodeFilename, ) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 224962e86..e68f20c9f 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -4,11 +4,12 @@ import os import time from .common import FileDownloader -from ..utils import ( +from ..compat import ( compat_urllib_request, compat_urllib_error, +) +from ..utils import ( ContentTooShortError, - encodeFilename, sanitize_open, format_bytes, diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 58ae2005c..575912675 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -7,9 +7,9 @@ import sys import time from .common import FileDownloader +from ..compat import compat_str from ..utils import ( check_executable, - compat_str, encodeFilename, format_bytes, get_exe_version, diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 119ec2044..3ae7a8a52 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -316,6 +316,7 @@ from .radiofrance import RadioFranceIE from .rai import RaiIE from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE +from .restudy import RestudyIE from .reverbnation import ReverbNationIE from .ringtv import RingTVIE from .ro220 import Ro220IE diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 39e4ca296..502a9c25a 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -7,6 +7,8 @@ import json from .common import InfoExtractor from ..utils import ( ExtractorError, + xpath_text, + float_or_none, ) @@ -128,7 +130,8 @@ class AdultSwimIE(InfoExtractor): segment_url, segment_title, 'Downloading segment information', 'Unable to download segment information') - segment_duration = idoc.find('.//trt').text.strip() + segment_duration = float_or_none( + xpath_text(idoc, './/trt', 'segment duration').strip()) formats = [] file_els = idoc.findall('.//files/file') diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 398e93bfb..623aeaf34 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -5,10 +5,9 @@ import re import json from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( - compat_str, qualities, - determine_ext, ) @@ -75,9 +74,7 @@ class AllocineIE(InfoExtractor): 'format_id': format_id, 'quality': quality(format_id), 'url': v, - 'ext': determine_ext(v), }) - self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index 47f8e4157..b51eafc45 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from .fivemin import FiveMinIE class AolIE(InfoExtractor): @@ -42,31 +41,30 @@ class AolIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - playlist_id = mobj.group('playlist_id') - if playlist_id and not self._downloader.params.get('noplaylist'): - self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) + if not playlist_id or self._downloader.params.get('noplaylist'): + return self.url_result('5min:%s' % video_id) - webpage = self._download_webpage(url, playlist_id) - title = self._html_search_regex( - r'

(.+?)

', webpage, 'title') - playlist_html = self._search_regex( - r"(?s)(.*?)", webpage, - 'playlist HTML') - entries = [{ - '_type': 'url', - 'url': 'aol-video:%s' % m.group('id'), - 'ie_key': 'Aol', - } for m in re.finditer( - r"[0-9]+)'\s+class='video-thumb'>", - playlist_html)] + self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) - return { - '_type': 'playlist', - 'id': playlist_id, - 'display_id': mobj.group('playlist_display_id'), - 'title': title, - 'entries': entries, - } + webpage = self._download_webpage(url, playlist_id) + title = self._html_search_regex( + r'

(.+?)

', webpage, 'title') + playlist_html = self._search_regex( + r"(?s)(.*?)", webpage, + 'playlist HTML') + entries = [{ + '_type': 'url', + 'url': 'aol-video:%s' % m.group('id'), + 'ie_key': 'Aol', + } for m in re.finditer( + r"[0-9]+)'\s+class='video-thumb'>", + playlist_html)] - return FiveMinIE._build_result(video_id) + return { + '_type': 'playlist', + 'id': playlist_id, + 'display_id': mobj.group('playlist_display_id'), + 'title': title, + 'entries': entries, + } diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 0c01fa1a1..7cd0482c7 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -4,8 +4,8 @@ import re import json from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( - compat_urlparse, int_or_none, ) diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index 1c765532a..014a21952 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -3,8 +3,8 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urllib_parse from ..utils import ( - compat_urllib_parse, determine_ext, ExtractorError, ) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index 1ca0b7cf2..98e1443ab 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -5,7 +5,7 @@ import json import itertools from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, ) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index acddbc8f1..bcb2821cd 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -4,9 +4,11 @@ import json import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, compat_urlparse, +) +from ..utils import ( ExtractorError, ) @@ -104,7 +106,7 @@ class BandcampIE(InfoExtractor): class BandcampAlbumIE(InfoExtractor): IE_NAME = 'Bandcamp:album' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com(?:/album/(?P[^?#]+))' + _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))?' _TESTS = [{ 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', @@ -139,6 +141,12 @@ class BandcampAlbumIE(InfoExtractor): 'title': 'Hierophany of the Open Grave', }, 'playlist_mincount': 9, + }, { + 'url': 'http://dotscale.bandcamp.com', + 'info_dict': { + 'title': 'Loom', + }, + 'playlist_mincount': 7, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index 01c02d360..2d2f742ae 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -209,7 +209,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor): webpage = self._download_webpage(url, group_id, 'Downloading video page') programme_id = self._search_regex( - r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False) + r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None) if programme_id: player = self._download_json( 'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id, diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py index c1fc433f7..003e50002 100644 --- a/youtube_dl/extractor/bet.py +++ b/youtube_dl/extractor/bet.py @@ -1,8 +1,8 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urllib_parse from ..utils import ( - compat_urllib_parse, xpath_text, xpath_with_ns, int_or_none, diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 0d5889f5d..241b904a9 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -4,8 +4,8 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_parse_qs from ..utils import ( - compat_parse_qs, ExtractorError, int_or_none, unified_strdate, @@ -29,10 +29,9 @@ class BiliBiliIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + video_code = self._search_regex( r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code') diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index bf18a97e0..1eca00470 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -6,20 +6,21 @@ import json import xml.etree.ElementTree from .common import InfoExtractor -from ..utils import ( - compat_urllib_parse, - find_xpath_attr, - fix_xml_ampersands, - compat_urlparse, - compat_str, - compat_urllib_request, +from ..compat import ( compat_parse_qs, + compat_str, + compat_urllib_parse, compat_urllib_parse_urlparse, - + compat_urllib_request, + compat_urlparse, +) +from ..utils import ( determine_ext, ExtractorError, - unsmuggle_url, + find_xpath_attr, + fix_xml_ampersands, unescapeHTML, + unsmuggle_url, ) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 97feb6704..2f866f3ef 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -4,10 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, compat_urllib_parse, compat_urllib_parse_urlparse, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py index b62be56da..9c25b2223 100644 --- a/youtube_dl/extractor/comcarcoff.py +++ b/youtube_dl/extractor/comcarcoff.py @@ -1,4 +1,6 @@ # encoding: utf-8 +from __future__ import unicode_literals + import json from .common import InfoExtractor diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 2e3ef3fda..48e2410b6 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals import re from .mtv import MTVServicesInfoExtractor -from ..utils import ( +from ..compat import ( compat_str, compat_urllib_parse, +) +from ..utils import ( ExtractorError, float_or_none, unified_strdate, diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 7a7e79360..3db4db4e4 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -5,12 +5,14 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, - orderedSet, compat_urllib_parse_urlparse, compat_urlparse, ) +from ..utils import ( + orderedSet, +) class CondeNastIE(InfoExtractor): diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index d7e2b841e..8f1ea02e7 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -10,10 +10,12 @@ import xml.etree.ElementTree from hashlib import sha1 from math import pow, sqrt, floor from .subtitles import SubtitlesInfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( + ExtractorError, bytes_to_intlist, intlist_to_bytes, unified_strdate, diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 936c13cd6..cf5841a7c 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -8,13 +8,15 @@ import itertools from .common import InfoExtractor from .subtitles import SubtitlesInfoExtractor -from ..utils import ( - compat_urllib_request, +from ..compat import ( compat_str, + compat_urllib_request, +) +from ..utils import ( + ExtractorError, + int_or_none, orderedSet, str_to_int, - int_or_none, - ExtractorError, unescapeHTML, ) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 45d66e2e6..c6b813f58 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -5,7 +5,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) diff --git a/youtube_dl/extractor/ehow.py b/youtube_dl/extractor/ehow.py index b766e17f2..9cb1bf301 100644 --- a/youtube_dl/extractor/ehow.py +++ b/youtube_dl/extractor/ehow.py @@ -1,8 +1,6 @@ from __future__ import unicode_literals -import re - -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) from .common import InfoExtractor @@ -24,11 +22,10 @@ class EHowIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)', - webpage, 'video URL') + video_url = self._search_regex( + r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL') final_url = compat_urllib_parse.unquote(video_url) uploader = self._html_search_meta('uploader', webpage) title = self._og_search_title(webpage).replace(' | eHow', '') diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py index f4c1e2a72..a30a1f330 100644 --- a/youtube_dl/extractor/eighttracks.py +++ b/youtube_dl/extractor/eighttracks.py @@ -6,7 +6,7 @@ import random import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, ) diff --git a/youtube_dl/extractor/engadget.py b/youtube_dl/extractor/engadget.py index 92ada81d2..4ea37ebd9 100644 --- a/youtube_dl/extractor/engadget.py +++ b/youtube_dl/extractor/engadget.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from .fivemin import FiveMinIE from ..utils import ( url_basename, ) @@ -27,11 +26,10 @@ class EngadgetIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) if video_id is not None: - return FiveMinIE._build_result(video_id) + return self.url_result('5min:%s' % video_id) else: title = url_basename(url) webpage = self._download_webpage(url, title) @@ -39,5 +37,5 @@ class EngadgetIE(InfoExtractor): return { '_type': 'playlist', 'title': title, - 'entries': [FiveMinIE._build_result(id) for id in ids] + 'entries': [self.url_result('5min:%s' % vid) for vid in ids] } diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index 476fc22b9..e240cb859 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -3,9 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, - +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/everyonesmixtape.py b/youtube_dl/extractor/everyonesmixtape.py index d237a8281..d872d828f 100644 --- a/youtube_dl/extractor/everyonesmixtape.py +++ b/youtube_dl/extractor/everyonesmixtape.py @@ -3,8 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index aacbf1414..36ba33128 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -3,16 +3,18 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse_urlparse, compat_urllib_request, compat_urllib_parse, +) +from ..utils import ( str_to_int, ) class ExtremeTubeIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)' + _VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)' _TESTS = [{ 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', 'md5': '1fb9228f5e3332ec8c057d6ac36f33e0', @@ -31,7 +33,7 @@ class ExtremeTubeIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') + video_id = mobj.group('id') url = 'http://www.' + mobj.group('url') req = compat_urllib_request.Request(url) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index 6f5d23559..81ceace53 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -1,19 +1,20 @@ #! -*- coding: utf-8 -*- from __future__ import unicode_literals -import re import hashlib from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_request, compat_urlparse, ) +from ..utils import ( + ExtractorError, +) class FC2IE(InfoExtractor): - _VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)' + _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)' IE_NAME = 'fc2' _TEST = { 'url': 'http://video.fc2.com/en/content/20121103kUan1KHs', @@ -26,9 +27,7 @@ class FC2IE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) self._downloader.cookiejar.clear_session_cookies() # must clear diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py index af439ccfe..3191116d9 100644 --- a/youtube_dl/extractor/firedrive.py +++ b/youtube_dl/extractor/firedrive.py @@ -4,11 +4,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) +from ..utils import ( + ExtractorError, +) class FiredriveIE(InfoExtractor): @@ -28,11 +30,8 @@ class FiredriveIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) url = 'http://firedrive.com/file/%s' % video_id - webpage = self._download_webpage(url, video_id) if re.search(self._FILE_DELETED_REGEX, webpage) is not None: diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py index f9c127ce6..5b24b921c 100644 --- a/youtube_dl/extractor/fivemin.py +++ b/youtube_dl/extractor/fivemin.py @@ -1,11 +1,11 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, compat_urllib_parse, +) +from ..utils import ( ExtractorError, ) @@ -13,7 +13,7 @@ from ..utils import ( class FiveMinIE(InfoExtractor): IE_NAME = '5min' _VALID_URL = r'''(?x) - (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(.*?&)?playList=| + (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=| 5min:) (?P<id>\d+) ''' @@ -41,13 +41,8 @@ class FiveMinIE(InfoExtractor): }, ] - @classmethod - def _build_result(cls, video_id): - return cls.url_result('5min:%s' % video_id, cls.ie_key()) - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id embed_page = self._download_webpage(embed_url, video_id, 'Downloading embed page') diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index b22ce2acb..7187e0752 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -3,12 +3,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, - unified_strdate, - str_to_int, - parse_duration, +) +from ..utils import ( clean_html, + parse_duration, + str_to_int, + unified_strdate, ) @@ -31,9 +33,7 @@ class FourTubeIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('id') + video_id = self._match_id(url) webpage_url = 'http://www.4tube.com/videos/' + video_id webpage = self._download_webpage(webpage_url, video_id) diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py index 898e0dda7..0c2972162 100644 --- a/youtube_dl/extractor/franceculture.py +++ b/youtube_dl/extractor/franceculture.py @@ -5,7 +5,7 @@ import json import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_parse_qs, compat_urlparse, ) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index e0420a48f..bbc760a49 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -6,13 +6,15 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( + compat_urllib_parse_urlparse, compat_urlparse, - ExtractorError, +) +from ..utils import ( clean_html, - parse_duration, - compat_urllib_parse_urlparse, + ExtractorError, int_or_none, + parse_duration, ) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index d570e3f6a..47373e215 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -4,9 +4,11 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urlparse, +) +from ..utils import ( unescapeHTML, ) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index de14ae1fb..d453ec010 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) diff --git a/youtube_dl/extractor/golem.py b/youtube_dl/extractor/golem.py index 53714f47f..2bfb99040 100644 --- a/youtube_dl/extractor/golem.py +++ b/youtube_dl/extractor/golem.py @@ -2,8 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, +) +from ..utils import ( determine_ext, ) diff --git a/youtube_dl/extractor/googlesearch.py b/youtube_dl/extractor/googlesearch.py index 469e1f935..498304cb2 100644 --- a/youtube_dl/extractor/googlesearch.py +++ b/youtube_dl/extractor/googlesearch.py @@ -4,7 +4,7 @@ import itertools import re from .common import SearchInfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index 1ac1da856..ae24aff84 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -4,11 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, - determine_ext, +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( + ExtractorError, int_or_none, ) @@ -106,7 +107,6 @@ class GorillaVidIE(InfoExtractor): formats = [{ 'format_id': 'sd', 'url': video_url, - 'ext': determine_ext(video_url), 'quality': 1, }] diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py index 8e812b669..704d0285d 100644 --- a/youtube_dl/extractor/hostingbulk.py +++ b/youtube_dl/extractor/hostingbulk.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_urllib_request, +) from ..utils import ( ExtractorError, - compat_urllib_request, int_or_none, urlencode_postdata, ) @@ -30,9 +32,7 @@ class HostingBulkIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) url = 'http://hostingbulk.com/{0:}.html'.format(video_id) # Custom request with cookie to set language to English, so our file diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py index 6d0d847c6..aa0724a02 100644 --- a/youtube_dl/extractor/hypem.py +++ b/youtube_dl/extractor/hypem.py @@ -1,20 +1,20 @@ from __future__ import unicode_literals import json -import re import time from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, - +) +from ..utils import ( ExtractorError, ) class HypemIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' + _VALID_URL = r'http://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/' _TEST = { 'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME', 'md5': 'b9cc91b5af8995e9f0c1cee04c575828', @@ -27,8 +27,7 @@ class HypemIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - track_id = mobj.group(1) + track_id = self._match_id(url) data = {'ax': 1, 'ts': time.time()} data_encoded = compat_urllib_parse.urlencode(data) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index f2c1c10f5..13a53a0cb 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -4,7 +4,7 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, ) diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index e76dd222d..f25f43664 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -1,10 +1,9 @@ from __future__ import unicode_literals import base64 -import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) @@ -24,9 +23,7 @@ class InfoQIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_title = self._html_search_regex(r'<title>(.*?)', webpage, 'title') diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 1e4799187..c813d4b82 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, compat_urllib_parse, +) +from ..utils import ( xpath_with_ns, ) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 4247d6391..8529bedfc 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -6,8 +6,10 @@ from random import random from math import floor from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index f0fba1adb..7a400323d 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -5,8 +5,10 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/keek.py b/youtube_dl/extractor/keek.py index 5d679e88d..c0956ba09 100644 --- a/youtube_dl/extractor/keek.py +++ b/youtube_dl/extractor/keek.py @@ -1,34 +1,39 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor class KeekIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P\w+)' + _VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P\w+)' IE_NAME = 'keek' _TEST = { 'url': 'https://www.keek.com/ytdl/keeks/NODfbab', - 'file': 'NODfbab.mp4', - 'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83', + 'md5': '09c5c109067536c1cec8bac8c21fea05', 'info_dict': { - 'uploader': 'ytdl', + 'id': 'NODfbab', + 'ext': 'mp4', + 'uploader': 'youtube-dl project', + 'uploader_id': 'ytdl', 'title': 'test chars: "\'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .', }, } def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - video_id = m.group('videoID') + video_id = self._match_id(url) video_url = 'http://cdn.keek.com/keek/video/%s' % video_id thumbnail = 'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id webpage = self._download_webpage(url, video_id) - uploader = self._html_search_regex( - r'
[\S\s]+?

(?P.+?)

', - webpage, 'uploader', fatal=False) + raw_desc = self._html_search_meta('description', webpage) + if raw_desc: + uploader = self._html_search_regex( + r'Watch (.*?)\s+\(', raw_desc, 'uploader', fatal=False) + uploader_id = self._html_search_regex( + r'Watch .*?\(@(.+?)\)', raw_desc, 'uploader_id', fatal=False) + else: + uploader = None + uploader_id = None return { 'id': video_id, @@ -36,5 +41,6 @@ class KeekIE(InfoExtractor): 'ext': 'mp4', 'title': self._og_search_title(webpage), 'thumbnail': thumbnail, - 'uploader': uploader + 'uploader': uploader, + 'uploader_id': uploader_id, } diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 75b63cffb..97dcb518a 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -4,7 +4,7 @@ import os import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse_urlparse, compat_urllib_request, compat_urllib_parse, @@ -15,7 +15,7 @@ from ..aes import ( class KeezMoviesIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?keezmovies\.com/video/.+?(?P[0-9]+)(?:[/?&]|$)' + _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P[0-9]+)(?:[/?&]|$)' _TEST = { 'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', 'file': '1214711.mp4', @@ -27,8 +27,7 @@ class KeezMoviesIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') + video_id = self._match_id(url) req = compat_urllib_request.Request(url) req.add_header('Cookie', 'age_verified=1') diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 03c4691c6..5247c6f58 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -4,10 +4,12 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, compat_urllib_parse_urlparse, compat_urlparse, +) +from ..utils import ( ExtractorError, find_xpath_attr, int_or_none, diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 2160d6cb0..26e84970d 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -5,12 +5,14 @@ import json from .subtitles import SubtitlesInfoExtractor from .common import InfoExtractor -from ..utils import ( +from ..compat import ( + compat_str, compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( ExtractorError, int_or_none, - compat_str, ) diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py index 1abf6e4f8..0b85a59d1 100644 --- a/youtube_dl/extractor/malemotion.py +++ b/youtube_dl/extractor/malemotion.py @@ -1,43 +1,33 @@ +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) class MalemotionIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P.+?)(#|$)' + _VALID_URL = r'https?://malemotion\.com/video/(.+?)\.(?P.+?)(#|$)' _TEST = { - 'url': 'http://malemotion.com/video/bien-dur.10ew', - 'file': '10ew.mp4', - 'md5': 'b3cc49f953b107e4a363cdff07d100ce', + 'url': 'http://malemotion.com/video/bete-de-concours.ltc', + 'md5': '3013e53a0afbde2878bc39998c33e8a5', 'info_dict': { - "title": "Bien dur", - "age_limit": 18, + 'id': 'ltc', + 'ext': 'mp4', + 'title': 'Bête de Concours', + 'age_limit': 18, }, - 'skip': 'This video has been deleted.' } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group("id") - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - self.report_extraction(video_id) - - # Extract video URL - video_url = compat_urllib_parse.unquote( - self._search_regex(r'(.*?)[^/]+)/' + _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P[^/]+)/' _TEST = { 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', @@ -31,12 +32,10 @@ class MiTeleIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - episode = mobj.group('episode') + episode = self._match_id(url) webpage = self._download_webpage(url, episode) embed_data_json = self._search_regex( - r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data', - flags=re.DOTALL + r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data', ).replace('\'', '"') embed_data = json.loads(embed_data_json) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 55cc33a3e..07d194562 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -3,8 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, +) +from ..utils import ( ExtractorError, HEADRequest, int_or_none, diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py index 2ff79b9b8..184f9c2c9 100644 --- a/youtube_dl/extractor/moevideo.py +++ b/youtube_dl/extractor/moevideo.py @@ -5,10 +5,12 @@ import json import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( + ExtractorError, int_or_none, ) diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py index d658647e6..2cec12d35 100644 --- a/youtube_dl/extractor/mofosex.py +++ b/youtube_dl/extractor/mofosex.py @@ -4,7 +4,7 @@ import os import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse_urlparse, compat_urllib_request, compat_urllib_parse, @@ -12,7 +12,7 @@ from ..utils import ( class MofosexIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?(?Pmofosex\.com/videos/(?P[0-9]+)/.*?\.html)' + _VALID_URL = r'https?://(?:www\.)?(?Pmofosex\.com/videos/(?P[0-9]+)/.*?\.html)' _TEST = { 'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html', 'md5': '1b2eb47ac33cc75d4a80e3026b613c5a', @@ -26,7 +26,7 @@ class MofosexIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') + video_id = mobj.group('id') url = 'http://www.' + mobj.group('url') req = compat_urllib_request.Request(url) diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py index 1c4f589cc..5de719bdc 100644 --- a/youtube_dl/extractor/moniker.py +++ b/youtube_dl/extractor/moniker.py @@ -5,7 +5,7 @@ import os.path import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) @@ -37,10 +37,9 @@ class MonikerIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) orig_webpage = self._download_webpage(url, video_id) + fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) data = dict(fields) diff --git a/youtube_dl/extractor/mooshare.py b/youtube_dl/extractor/mooshare.py index 34a4bec3a..9f2853fa3 100644 --- a/youtube_dl/extractor/mooshare.py +++ b/youtube_dl/extractor/mooshare.py @@ -4,11 +4,13 @@ import re import time from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_request, compat_urllib_parse, ) +from ..utils import ( + ExtractorError, +) class MooshareIE(InfoExtractor): @@ -43,9 +45,7 @@ class MooshareIE(InfoExtractor): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) page = self._download_webpage(url, video_id, 'Downloading page') if re.search(r'>Video Not Found or Deleted<', page) is not None: diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index 7c0ec6a12..f5ca74e97 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -3,13 +3,14 @@ from __future__ import unicode_literals import hashlib import json -import re import time from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_parse_qs, compat_str, +) +from ..utils import ( int_or_none, ) @@ -32,10 +33,9 @@ class MotorsportIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('id') - + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) + flashvars_code = self._html_search_regex( r'[\da-z_-]+)' + _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P[\da-z_-]+)' _TEST = { 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', @@ -32,10 +30,10 @@ class NFBIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page') + video_id = self._match_id(url) + page = self._download_webpage( + 'https://www.nfb.ca/film/%s' % video_id, video_id, + 'Downloading film page') uploader_id = self._html_search_regex(r'[0-9]+)' + _VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P[0-9]+)' _TEST = { 'url': 'http://m.nuvid.com/video/1310741/', 'md5': 'eab207b7ac4fccfb4e23c86201f11277', @@ -26,8 +28,7 @@ class NuvidIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) formats = [] diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 572a234ad..a6e722bf5 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -20,21 +20,23 @@ class ORFTVthekIE(InfoExtractor): _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P\d+)' _TEST = { - 'url': 'http://tvthek.orf.at/program/matinee-Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7317210/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319746/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319747', - 'file': '7319747.mp4', - 'md5': 'bd803c5d8c32d3c64a0ea4b4eeddf375', - 'info_dict': { - 'title': 'Was Sie schon immer über Klassik wissen wollten', - 'description': 'md5:0ddf0d5f0060bd53f744edaa5c2e04a4', - 'duration': 3508, - 'upload_date': '20140105', - }, + 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389', + 'playlist': [{ + 'md5': '2942210346ed779588f428a92db88712', + 'info_dict': { + 'id': '8896777', + 'ext': 'mp4', + 'title': 'Aufgetischt: Mit der Steirischen Tafelrunde', + 'description': 'md5:c1272f0245537812d4e36419c207b67d', + 'duration': 2668, + 'upload_date': '20141208', + }, + }], 'skip': 'Blocked outside of Austria', } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('id') + playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) data_json = self._search_regex( @@ -120,9 +122,7 @@ class ORFOE1IE(InfoExtractor): _VALID_URL = r'http://oe1\.orf\.at/programm/(?P[0-9]+)' def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - show_id = mobj.group('id') - + show_id = self._match_id(url) data = self._download_json( 'http://oe1.orf.at/programm/%s/konsole' % show_id, show_id diff --git a/youtube_dl/extractor/photobucket.py b/youtube_dl/extractor/photobucket.py index b4389e0b6..c66db3cdc 100644 --- a/youtube_dl/extractor/photobucket.py +++ b/youtube_dl/extractor/photobucket.py @@ -4,16 +4,17 @@ import json import re from .common import InfoExtractor -from ..utils import compat_urllib_parse +from ..compat import compat_urllib_parse class PhotobucketIE(InfoExtractor): _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P.*)\.(?P(flv)|(mp4))' _TEST = { 'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', - 'file': 'zpsc0c3b9fa.mp4', 'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99', 'info_dict': { + 'id': 'zpsc0c3b9fa', + 'ext': 'mp4', 'timestamp': 1367669341, 'upload_date': '20130504', 'uploader': 'rachaneronas', diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py index 17880471d..449d4836c 100644 --- a/youtube_dl/extractor/played.py +++ b/youtube_dl/extractor/played.py @@ -5,11 +5,13 @@ import re import os.path from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) +from ..utils import ( + ExtractorError, +) class PlayedIE(InfoExtractor): @@ -28,7 +30,6 @@ class PlayedIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - orig_webpage = self._download_webpage(url, video_id) m_error = re.search( diff --git a/youtube_dl/extractor/playfm.py b/youtube_dl/extractor/playfm.py index ebc046804..9576aed0e 100644 --- a/youtube_dl/extractor/playfm.py +++ b/youtube_dl/extractor/playfm.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( ExtractorError, float_or_none, int_or_none, diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py index cd3905acb..c3e667e9e 100644 --- a/youtube_dl/extractor/playvid.py +++ b/youtube_dl/extractor/playvid.py @@ -3,31 +3,31 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse, +) from ..utils import ( - ExtractorError, clean_html, - compat_urllib_parse, + ExtractorError, ) class PlayvidIE(InfoExtractor): - _VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P.+?)(?:#|$)' + _VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P.+?)(?:#|$)' _TEST = { - 'url': 'http://www.playvid.com/watch/agbDDi7WZTV', - 'md5': '44930f8afa616efdf9482daf4fe53e1e', + 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu', + 'md5': 'ffa2f6b2119af359f544388d8c01eb6c', 'info_dict': { - 'id': 'agbDDi7WZTV', + 'id': 'RnmBNgtrrJu', 'ext': 'mp4', - 'title': 'Michelle Lewin in Miami Beach', - 'duration': 240, + 'title': 'md5:9256d01c6317e3f703848b5906880dc8', + 'duration': 82, 'age_limit': 18, } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) m_error = re.search( diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 2ca15b717..634142d0d 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -4,10 +4,12 @@ import os import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( + compat_urllib_parse, compat_urllib_parse_urlparse, compat_urllib_request, - compat_urllib_parse, +) +from ..utils import ( str_to_int, ) from ..aes import ( @@ -16,7 +18,7 @@ from ..aes import ( class PornHubIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P[0-9a-f]+)' + _VALID_URL = r'https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P[0-9a-f]+)' _TEST = { 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'md5': '882f488fa1f0026f023f33576004a2ed', diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index 7fcde086c..f536e6e6c 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -4,12 +4,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, - determine_ext, +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) +from ..utils import ( + determine_ext, + ExtractorError, +) class PromptFileIE(InfoExtractor): diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 1262793c8..385681d06 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -5,8 +5,10 @@ import re from hashlib import sha1 from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, +) +from ..utils import ( unified_strdate, ) diff --git a/youtube_dl/extractor/quickvid.py b/youtube_dl/extractor/quickvid.py index 3bc78060d..af7d76cf4 100644 --- a/youtube_dl/extractor/quickvid.py +++ b/youtube_dl/extractor/quickvid.py @@ -3,8 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, +) +from ..utils import ( determine_ext, int_or_none, ) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 2d39ecfe4..aa26b7e0b 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -3,10 +3,12 @@ from __future__ import unicode_literals import re from .subtitles import SubtitlesInfoExtractor +from ..compat import ( + compat_urllib_parse, +) from ..utils import ( parse_duration, unified_strdate, - compat_urllib_parse, ) diff --git a/youtube_dl/extractor/restudy.py b/youtube_dl/extractor/restudy.py new file mode 100644 index 000000000..b17c2bfc0 --- /dev/null +++ b/youtube_dl/extractor/restudy.py @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RestudyIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?restudy\.dk/video/play/id/(?P[0-9]+)' + _TEST = { + 'url': 'https://www.restudy.dk/video/play/id/1637', + 'info_dict': { + 'id': '1637', + 'ext': 'flv', + 'title': 'Leiden-frosteffekt', + 'description': 'Denne video er et eksperiment med flydende kvælstof.', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage).strip() + description = self._og_search_description(webpage).strip() + + formats = self._extract_smil_formats( + 'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id, + video_id) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + } diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py index dc59a5e5c..5e84c1098 100644 --- a/youtube_dl/extractor/rts.py +++ b/youtube_dl/extractor/rts.py @@ -4,12 +4,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_str, +) from ..utils import ( int_or_none, parse_duration, parse_iso8601, unescapeHTML, - compat_str, ) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 6941d96fb..b72b5a586 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -5,10 +5,12 @@ import re import itertools from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, - unified_strdate, +) +from ..utils import ( ExtractorError, + unified_strdate, ) @@ -36,9 +38,7 @@ class RutubeIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) video = self._download_json( 'http://rutube.ru/api/video/%s/?format=json' % video_id, video_id, 'Downloading video JSON') @@ -114,8 +114,7 @@ class RutubeMovieIE(RutubeChannelIE): _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - movie_id = mobj.group('id') + movie_id = self._match_id(url) movie = self._download_json( self._MOVIE_TEMPLATE % movie_id, movie_id, 'Downloading movie JSON') diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index c145f6fc7..dfd897ba3 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_parse_qs, compat_urllib_request, ) +from ..utils import ( + ExtractorError, +) class ScreencastIE(InfoExtractor): @@ -57,8 +57,7 @@ class ScreencastIE(InfoExtractor): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_url = self._html_search_regex( diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index fdc31603a..26ced716e 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -4,10 +4,12 @@ import re import base64 from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse, + compat_urllib_request, +) from ..utils import ( ExtractorError, - compat_urllib_request, - compat_urllib_parse, int_or_none, ) @@ -26,26 +28,30 @@ class SharedIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - page = self._download_webpage(url, video_id) - - if re.search(r'>File does not exist<', page) is not None: - raise ExtractorError('Video %s does not exist' % video_id, expected=True) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - download_form = dict(re.findall(r'File does not exist<' in webpage: + raise ExtractorError( + 'Video %s does not exist' % video_id, expected=True) - request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(download_form)) + download_form = dict(re.findall( + r'.+)' + _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P[0-9]+)' _TESTS = [{ 'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti', @@ -21,61 +17,63 @@ class StreamCZIE(InfoExtractor): 'id': '765767', 'ext': 'mp4', 'title': 'Peklo na talíři: Éčka pro děti', - 'description': 'md5:49ace0df986e95e331d0fe239d421519', - 'thumbnail': 'http://im.stream.cz/episode/52961d7e19d423f8f06f0100', + 'description': 'Taška s grónskou pomazánkou a další pekelnosti ZDE', + 'thumbnail': 're:^http://im.stream.cz/episode/52961d7e19d423f8f06f0100', 'duration': 256, }, }, { 'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka', - 'md5': '246272e753e26bbace7fcd9deca0650c', + 'md5': 'e54a254fb8b871968fd8403255f28589', 'info_dict': { 'id': '10002447', 'ext': 'mp4', 'title': 'Kancelář Blaník: Tři roky pro Mazánka', - 'description': 'md5:9177695a8b756a0a8ab160de4043b392', - 'thumbnail': 'http://im.stream.cz/episode/537f838c50c11f8d21320000', + 'description': 'md5:3862a00ba7bf0b3e44806b544032c859', + 'thumbnail': 're:^http://im.stream.cz/episode/537f838c50c11f8d21320000', 'duration': 368, }, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') - - webpage = self._download_webpage(url, video_id) - - data = self._html_search_regex(r'Stream\.Data\.Episode\((.+?)\);', webpage, 'stream data') - - jsonData = json.loads(data) + video_id = self._match_id(url) + data = self._download_json( + 'http://www.stream.cz/API/episode/%s' % video_id, video_id) formats = [] - for video in jsonData['instances']: - for video_format in video['instances']: - format_id = video_format['quality'] - - if format_id == '240p': - quality = 0 - elif format_id == '360p': - quality = 1 - elif format_id == '480p': - quality = 2 - elif format_id == '720p': - quality = 3 - + for quality, video in enumerate(data['video_qualities']): + for f in video['formats']: + typ = f['type'].partition('/')[2] + qlabel = video.get('quality_label') formats.append({ - 'format_id': '%s-%s' % (video_format['type'].split('/')[1], format_id), - 'url': video_format['source'], + 'format_note': '%s-%s' % (qlabel, typ) if qlabel else typ, + 'format_id': '%s-%s' % (typ, f['quality']), + 'url': f['source'], + 'height': int_or_none(f['quality'].rstrip('p')), 'quality': quality, }) - self._sort_formats(formats) + image = data.get('image') + if image: + thumbnail = self._proto_relative_url( + image.replace('{width}', '1240').replace('{height}', '697'), + scheme='http:', + ) + else: + thumbnail = None + + stream = data.get('_embedded', {}).get('stream:show', {}).get('name') + if stream: + title = '%s: %s' % (stream, data['name']) + else: + title = data['name'] + return { - 'id': compat_str(jsonData['episode_id']), - 'title': self._og_search_title(webpage), - 'thumbnail': jsonData['episode_image_original_url'].replace('//', 'http://'), + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, 'formats': formats, - 'description': self._og_search_description(webpage), - 'duration': int_or_none(jsonData['duration']), - 'view_count': int_or_none(jsonData['stats_total']), + 'description': data.get('web_site_text'), + 'duration': int_or_none(data.get('duration')), + 'view_count': int_or_none(data.get('views')), } diff --git a/youtube_dl/extractor/tapely.py b/youtube_dl/extractor/tapely.py index 283e11350..f1f43d0a7 100644 --- a/youtube_dl/extractor/tapely.py +++ b/youtube_dl/extractor/tapely.py @@ -4,10 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_urllib_request, +) from ..utils import ( - ExtractorError, clean_html, - compat_urllib_request, + ExtractorError, float_or_none, parse_iso8601, ) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 72160503c..944177426 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -5,7 +5,7 @@ import re from .subtitles import SubtitlesInfoExtractor -from ..utils import ( +from ..compat import ( compat_str, ) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index e2653d62d..af6ef0033 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -4,8 +4,10 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_str, +) +from ..utils import ( determine_ext, ExtractorError, xpath_with_ns, diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index 66d159e99..9f9e388c5 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -5,7 +5,7 @@ import re from .common import InfoExtractor from .brightcove import BrightcoveIE from .discovery import DiscoveryIE -from ..utils import compat_urlparse +from ..compat import compat_urlparse class TlcIE(DiscoveryIE): diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 64a1e9030..d73ad3762 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -4,9 +4,11 @@ import json import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse_urlparse, compat_urllib_request, +) +from ..utils import ( int_or_none, str_to_int, ) diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py index d516b6427..4de0aac52 100644 --- a/youtube_dl/extractor/tutv.py +++ b/youtube_dl/extractor/tutv.py @@ -1,10 +1,9 @@ from __future__ import unicode_literals import base64 -import re from .common import InfoExtractor -from ..utils import compat_parse_qs +from ..compat import compat_parse_qs class TutvIE(InfoExtractor): @@ -20,10 +19,9 @@ class TutvIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID') data_content = self._download_webpage( diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 397d167e8..715f9930c 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -5,9 +5,11 @@ import itertools import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( ExtractorError, parse_iso8601, ) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 5271611ac..4667ed83b 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/urort.py b/youtube_dl/extractor/urort.py index 5d06fcc9e..8872cfcb2 100644 --- a/youtube_dl/extractor/urort.py +++ b/youtube_dl/extractor/urort.py @@ -1,11 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, +) +from ..utils import ( unified_strdate, ) @@ -18,11 +18,10 @@ class UrortIE(InfoExtractor): 'url': 'https://urort.p3.no/#!/Band/Gerilja', 'md5': '5ed31a924be8a05e47812678a86e127b', 'info_dict': { - 'id': '33124-4', + 'id': '33124-24', 'ext': 'mp3', 'title': 'The Bomb', 'thumbnail': 're:^https?://.+\.jpg', - 'like_count': int, 'uploader': 'Gerilja', 'uploader_id': 'Gerilja', 'upload_date': '20100323', @@ -33,25 +32,31 @@ class UrortIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('id') + playlist_id = self._match_id(url) fstr = compat_urllib_parse.quote("InternalBandUrl eq '%s'" % playlist_id) - json_url = 'http://urort.p3.no/breeze/urort/TrackDtos?$filter=' + fstr + json_url = 'http://urort.p3.no/breeze/urort/TrackDTOViews?$filter=%s&$orderby=Released%%20desc&$expand=Tags%%2CFiles' % fstr songs = self._download_json(json_url, playlist_id) - print(songs[0]) - - entries = [{ - 'id': '%d-%s' % (s['BandId'], s['$id']), - 'title': s['Title'], - 'url': s['TrackUrl'], - 'ext': 'mp3', - 'uploader_id': playlist_id, - 'uploader': s.get('BandName', playlist_id), - 'like_count': s.get('LikeCount'), - 'thumbnail': 'http://urort.p3.no/cloud/images/%s' % s['Image'], - 'upload_date': unified_strdate(s.get('Released')), - } for s in songs] + entries = [] + for s in songs: + formats = [{ + 'tbr': f.get('Quality'), + 'ext': f['FileType'], + 'format_id': '%s-%s' % (f['FileType'], f.get('Quality', '')), + 'url': 'http://p3urort.blob.core.windows.net/tracks/%s' % f['FileRef'], + 'preference': 3 if f['FileType'] == 'mp3' else 2, + } for f in s['Files']] + self._sort_formats(formats) + e = { + 'id': '%d-%s' % (s['BandId'], s['$id']), + 'title': s['Title'], + 'uploader_id': playlist_id, + 'uploader': s.get('BandName', playlist_id), + 'thumbnail': 'http://urort.p3.no/cloud/images/%s' % s['Image'], + 'upload_date': unified_strdate(s.get('Released')), + 'formats': formats, + } + entries.append(e) return { '_type': 'playlist', diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 53dc3a496..68d03b999 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, ) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index 455b6d9da..dd026748d 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -1,19 +1,18 @@ # encoding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, - +) +from ..utils import ( ExtractorError, ) class Vbox7IE(InfoExtractor): - _VALID_URL = r'http://(www\.)?vbox7\.com/play:(?P[^/]+)' + _VALID_URL = r'http://(?:www\.)?vbox7\.com/play:(?P[^/]+)' _TEST = { 'url': 'http://vbox7.com/play:249bb972c2', 'md5': '99f65c0c9ef9b682b97313e052734c3f', @@ -25,8 +24,7 @@ class Vbox7IE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) redirect_page, urlh = self._download_webpage_handle(url, video_id) new_location = self._search_regex(r'window\.location = \'(.*)\';', diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py index 94647d1c8..815f58468 100644 --- a/youtube_dl/extractor/veehd.py +++ b/youtube_dl/extractor/veehd.py @@ -4,10 +4,12 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urlparse, - get_element_by_id, +) +from ..utils import ( clean_html, + get_element_by_id, ) @@ -26,8 +28,7 @@ class VeeHDIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) # VeeHD seems to send garbage on the first request. # See https://github.com/rg3/youtube-dl/issues/2102 diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index a7953a7e7..01e258e32 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -4,8 +4,10 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( int_or_none, ExtractorError, ) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index c912c3cbe..43f6b029d 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -4,8 +4,10 @@ import re import xml.etree.ElementTree from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( ExtractorError, ) diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py index ac6c25537..0ffc7ff7d 100644 --- a/youtube_dl/extractor/videodetective.py +++ b/youtube_dl/extractor/videodetective.py @@ -1,10 +1,8 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..compat import compat_urlparse from .internetvideoarchive import InternetVideoArchiveIE -from ..utils import compat_urlparse class VideoDetectiveIE(InfoExtractor): @@ -17,13 +15,12 @@ class VideoDetectiveIE(InfoExtractor): 'ext': 'mp4', 'title': 'KICK-ASS 2', 'description': 'md5:65ba37ad619165afac7d432eaded6013', - 'duration': 135, + 'duration': 138, }, } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) og_video = self._og_search_video_url(webpage) query = compat_urlparse.urlparse(og_video).query diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py index 29c4e0101..7a78f0d26 100644 --- a/youtube_dl/extractor/videomega.py +++ b/youtube_dl/extractor/videomega.py @@ -1,11 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, +) +from ..utils import ( remove_start, ) @@ -27,9 +27,7 @@ class VideoMegaIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id) webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index ca6b0d5b3..542e9198a 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -5,14 +5,17 @@ import re import json from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urllib_parse, + compat_urllib_request, +) from ..utils import ( ExtractorError, - compat_urllib_request, - compat_urllib_parse, - compat_str, + orderedSet, unescapeHTML, unified_strdate, - orderedSet) +) class VKIE(InfoExtractor): diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index affef6507..1c0966a79 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -2,8 +2,9 @@ from __future__ import unicode_literals import re + from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urllib_request, ) @@ -24,8 +25,7 @@ class VodlockerIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) fields = dict(re.findall(r'''(?x)]*>(.*?)', webpage, 'title') diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index 53ed7ef5a..79ed6c744 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -1,10 +1,8 @@ # encoding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, ) @@ -23,10 +21,7 @@ class XNXXIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - # Get webpage content + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_url = self._search_regex(r'flv_url=(.*?)&', diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 38448e7c0..f9d98b83f 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -4,15 +4,17 @@ import re import json from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_request, +) +from ..utils import ( parse_duration, str_to_int, ) class XTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?Pxtube\.com/watch\.php\?v=(?P[^/?&]+))' + _VALID_URL = r'https?://(?:www\.)?(?Pxtube\.com/watch\.php\?v=(?P[^/?&]+))' _TEST = { 'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_', 'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab', @@ -29,7 +31,7 @@ class XTubeIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') + video_id = mobj.group('id') url = 'http://www.' + mobj.group('url') req = compat_urllib_request.Request(url) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 7e0044824..2a45dc574 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -3,15 +3,17 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, - ExtractorError, +) +from ..utils import ( clean_html, + ExtractorError, ) class XVideosIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)' + _VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P[0-9]+)(?:.*)' _TEST = { 'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl', 'md5': '4b46ae6ea5e6e9086e714d883313c0c9', @@ -24,37 +26,25 @@ class XVideosIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - self.report_extraction(video_id) - mobj = re.search(r'

(.+?)

', webpage) if mobj: raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) - # Extract video URL video_url = compat_urllib_parse.unquote( self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL')) - - # Extract title video_title = self._html_search_regex( r'(.*?)\s+-\s+XVID', webpage, 'title') - - # Extract video thumbnail video_thumbnail = self._search_regex( r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False) return { 'id': video_id, 'url': video_url, - 'uploader': None, - 'upload_date': None, 'title': video_title, 'ext': 'flv', 'thumbnail': video_thumbnail, - 'description': None, 'age_limit': 18, } diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 0fdb12243..031226f27 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -6,11 +6,13 @@ import json import re from .common import InfoExtractor, SearchInfoExtractor -from ..utils import ( - ExtractorError, +from ..compat import ( compat_urllib_parse, compat_urlparse, +) +from ..utils import ( clean_html, + ExtractorError, int_or_none, ) diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py index 7b621a9e3..894678a23 100644 --- a/youtube_dl/extractor/ynet.py +++ b/youtube_dl/extractor/ynet.py @@ -5,7 +5,7 @@ import re import json from .common import InfoExtractor -from ..utils import compat_urllib_parse +from ..compat import compat_urllib_parse class YnetIE(InfoExtractor): diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index d9c06a2ee..107c9ac36 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -6,10 +6,11 @@ import re import sys from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse_urlparse, compat_urllib_request, - +) +from ..utils import ( ExtractorError, unescapeHTML, unified_strdate, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7b6179a2a..823d6aaf3 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1072,7 +1072,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) )""" _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' - _MORE_PAGES_INDICATOR = r'data-link-type="next"' _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)' IE_NAME = 'youtube:playlist' _TESTS = [{ @@ -1269,8 +1268,6 @@ class YoutubeTopListIE(YoutubePlaylistIE): class YoutubeChannelIE(InfoExtractor): IE_DESC = 'YouTube.com channels' _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)' - _MORE_PAGES_INDICATOR = 'yt-uix-load-more' - _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' IE_NAME = 'youtube:channel' _TESTS = [{ 'note': 'paginated channel', @@ -1307,20 +1304,27 @@ class YoutubeChannelIE(InfoExtractor): return self.playlist_result(entries, channel_id) def _entries(): + more_widget_html = content_html = channel_page for pagenum in itertools.count(1): - url = self._MORE_PAGES_URL % (pagenum, channel_id) - page = self._download_json( - url, channel_id, note='Downloading page #%s' % pagenum, - transform_source=uppercase_escape) - ids_in_page = self.extract_videos_from_page(page['content_html']) + ids_in_page = self.extract_videos_from_page(content_html) for video_id in ids_in_page: yield self.url_result( video_id, 'Youtube', video_id=video_id) - if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: + mobj = re.search( + r'data-uix-load-more-href="/?(?P<more>[^"]+)"', + more_widget_html) + if not mobj: break + more = self._download_json( + 'https://youtube.com/%s' % mobj.group('more'), channel_id, + 'Downloading page #%s' % (pagenum + 1), + transform_source=uppercase_escape) + content_html = more['content_html'] + more_widget_html = more['load_more_widget_html'] + return self.playlist_result(_entries(), channel_id) diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 2bd264b30..e60505ace 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -4,8 +4,8 @@ import collections import io import zlib +from .compat import compat_str from .utils import ( - compat_str, ExtractorError, struct_unpack, ) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 51a822e4f..ec34dcef9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -388,7 +388,13 @@ def formatSeconds(secs): def make_HTTPS_handler(opts_no_check_certificate, **kwargs): - if sys.version_info < (3, 2): + if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9 + context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) + context.options &= ~ssl.OP_NO_SSLv3 # Allow older, not-as-secure SSLv3 + if opts_no_check_certificate: + context.verify_mode = ssl.CERT_NONE + return compat_urllib_request.HTTPSHandler(context=context, **kwargs) + elif sys.version_info < (3, 2): import httplib class HTTPSConnectionV3(httplib.HTTPSConnection): @@ -409,22 +415,12 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs): def https_open(self, req): return self.do_open(HTTPSConnectionV3, req) return HTTPSHandlerV3(**kwargs) - elif hasattr(ssl, 'create_default_context'): # Python >= 3.4 - context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) - context.options &= ~ssl.OP_NO_SSLv3 # Allow older, not-as-secure SSLv3 - if opts_no_check_certificate: - context.verify_mode = ssl.CERT_NONE - return compat_urllib_request.HTTPSHandler(context=context, **kwargs) else: # Python < 3.4 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) context.verify_mode = (ssl.CERT_NONE if opts_no_check_certificate else ssl.CERT_REQUIRED) context.set_default_verify_paths() - try: - context.load_default_certs() - except AttributeError: - pass # Python < 3.4 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)