Fix imports and general cleanup
authorPhilipp Hagemeister <phihag@phihag.de>
Sat, 13 Dec 2014 11:24:42 +0000 (12:24 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Sat, 13 Dec 2014 11:35:45 +0000 (12:35 +0100)
· Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail.
· Use _match_id consistently whenever possible
· Fix some outdated tests
· Use consistent valid URL (always match the whole protocol, no ^ at start required)
· Use modern test definitions

120 files changed:
youtube_dl/downloader/common.py
youtube_dl/downloader/f4m.py
youtube_dl/downloader/hls.py
youtube_dl/downloader/http.py
youtube_dl/downloader/rtmp.py
youtube_dl/extractor/allocine.py
youtube_dl/extractor/aol.py
youtube_dl/extractor/appletrailers.py
youtube_dl/extractor/auengine.py
youtube_dl/extractor/bambuser.py
youtube_dl/extractor/bandcamp.py
youtube_dl/extractor/bet.py
youtube_dl/extractor/bilibili.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/ceskatelevize.py
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/condenast.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/daum.py
youtube_dl/extractor/ehow.py
youtube_dl/extractor/eighttracks.py
youtube_dl/extractor/escapist.py
youtube_dl/extractor/everyonesmixtape.py
youtube_dl/extractor/extremetube.py
youtube_dl/extractor/fc2.py
youtube_dl/extractor/firedrive.py
youtube_dl/extractor/fourtube.py
youtube_dl/extractor/franceculture.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/gamespot.py
youtube_dl/extractor/gdcvault.py
youtube_dl/extractor/golem.py
youtube_dl/extractor/googlesearch.py
youtube_dl/extractor/gorillavid.py
youtube_dl/extractor/hostingbulk.py
youtube_dl/extractor/hypem.py
youtube_dl/extractor/imdb.py
youtube_dl/extractor/infoq.py
youtube_dl/extractor/internetvideoarchive.py
youtube_dl/extractor/iprima.py
youtube_dl/extractor/ivi.py
youtube_dl/extractor/keezmovies.py
youtube_dl/extractor/livestream.py
youtube_dl/extractor/lynda.py
youtube_dl/extractor/malemotion.py
youtube_dl/extractor/metacafe.py
youtube_dl/extractor/mit.py
youtube_dl/extractor/mitele.py
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/moevideo.py
youtube_dl/extractor/mofosex.py
youtube_dl/extractor/moniker.py
youtube_dl/extractor/mooshare.py
youtube_dl/extractor/motorsport.py
youtube_dl/extractor/movieclips.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/myspass.py
youtube_dl/extractor/naver.py
youtube_dl/extractor/nbc.py
youtube_dl/extractor/nfb.py
youtube_dl/extractor/nfl.py
youtube_dl/extractor/niconico.py
youtube_dl/extractor/noco.py
youtube_dl/extractor/nosvideo.py
youtube_dl/extractor/novamov.py
youtube_dl/extractor/nuvid.py
youtube_dl/extractor/photobucket.py
youtube_dl/extractor/played.py
youtube_dl/extractor/playfm.py
youtube_dl/extractor/playvid.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/promptfile.py
youtube_dl/extractor/prosiebensat1.py
youtube_dl/extractor/quickvid.py
youtube_dl/extractor/rai.py
youtube_dl/extractor/rts.py
youtube_dl/extractor/rutube.py
youtube_dl/extractor/screencast.py
youtube_dl/extractor/shared.py
youtube_dl/extractor/sharesix.py
youtube_dl/extractor/sina.py
youtube_dl/extractor/slideshare.py
youtube_dl/extractor/smotri.py
youtube_dl/extractor/sockshare.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/spankwire.py
youtube_dl/extractor/sportdeutschland.py
youtube_dl/extractor/streamcloud.py
youtube_dl/extractor/streamcz.py
youtube_dl/extractor/tapely.py
youtube_dl/extractor/ted.py
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/tlc.py
youtube_dl/extractor/tube8.py
youtube_dl/extractor/tutv.py
youtube_dl/extractor/twitch.py
youtube_dl/extractor/udemy.py
youtube_dl/extractor/urort.py
youtube_dl/extractor/ustream.py
youtube_dl/extractor/vbox7.py
youtube_dl/extractor/veehd.py
youtube_dl/extractor/veoh.py
youtube_dl/extractor/vevo.py
youtube_dl/extractor/videodetective.py
youtube_dl/extractor/videomega.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/vodlocker.py
youtube_dl/extractor/vube.py
youtube_dl/extractor/vuclip.py
youtube_dl/extractor/wdr.py
youtube_dl/extractor/wistia.py
youtube_dl/extractor/xbef.py
youtube_dl/extractor/xnxx.py
youtube_dl/extractor/xtube.py
youtube_dl/extractor/xvideos.py
youtube_dl/extractor/yahoo.py
youtube_dl/extractor/ynet.py
youtube_dl/extractor/youporn.py
youtube_dl/swfinterp.py

index 2a566eabe5ecb564ff1f102dd100c1798a26d436..8181bca093c227d1f11b1c2d91d61f026749a731 100644 (file)
@@ -5,8 +5,8 @@ import re
 import sys
 import time
 
+from ..compat import compat_str
 from ..utils import (
-    compat_str,
     encodeFilename,
     format_bytes,
     timeconvert,
index 7cd22c504e463ad2551692728bd3933e8bcf20ab..00f3a026c8497953059426e52a6e16c9b0813186 100644 (file)
@@ -9,10 +9,12 @@ import xml.etree.ElementTree as etree
 
 from .common import FileDownloader
 from .http import HttpFD
+from ..compat import (
+    compat_urlparse,
+)
 from ..utils import (
     struct_pack,
     struct_unpack,
-    compat_urlparse,
     format_bytes,
     encodeFilename,
     sanitize_open,
index ad26cfa4085bbb028c7252aa9db2a8de3f7bd1e4..5bb0f3cfd19632f126d4f7d7b0df407b608778fc 100644 (file)
@@ -6,9 +6,11 @@ import subprocess
 
 from ..postprocessor.ffmpeg import FFmpegPostProcessor
 from .common import FileDownloader
-from ..utils import (
+from ..compat import (
     compat_urlparse,
     compat_urllib_request,
+)
+from ..utils import (
     check_executable,
     encodeFilename,
 )
index 224962e86827956b2145378b7a6cf920a02029e6..e68f20c9f46a93ebfeca2ff47dc0843f4ab94874 100644 (file)
@@ -4,11 +4,12 @@ import os
 import time
 
 from .common import FileDownloader
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
     compat_urllib_error,
+)
+from ..utils import (
     ContentTooShortError,
-
     encodeFilename,
     sanitize_open,
     format_bytes,
index 58ae2005c014eb4aafbd68e087d5018d26d02a21..5759126751c3628da461136c4439f7e9985027d2 100644 (file)
@@ -7,9 +7,9 @@ import sys
 import time
 
 from .common import FileDownloader
+from ..compat import compat_str
 from ..utils import (
     check_executable,
-    compat_str,
     encodeFilename,
     format_bytes,
     get_exe_version,
index 398e93bfb4f8472a23d2b8669e7f83f867933244..623aeaf3490c30f6fb2a26753a0e1cace3e4a560 100644 (file)
@@ -5,10 +5,9 @@ import re
 import json
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
-    compat_str,
     qualities,
-    determine_ext,
 )
 
 
@@ -75,9 +74,7 @@ class AllocineIE(InfoExtractor):
                     'format_id': format_id,
                     'quality': quality(format_id),
                     'url': v,
-                    'ext': determine_ext(v),
                 })
-
         self._sort_formats(formats)
 
         return {
index 185ee3693e7f3cb3ec61182a4a7f9dd461ffc2cf..b51eafc45928f8e6ff4ce571763593f71b715583 100644 (file)
@@ -68,4 +68,3 @@ class AolIE(InfoExtractor):
             'title': title,
             'entries': entries,
         }
-
index 0c01fa1a13ffa6fbfbfe7b7fb2283d5ed4f8b70f..7cd0482c75d7157df218071a2e22ce2904d094b6 100644 (file)
@@ -4,8 +4,8 @@ import re
 import json
 
 from .common import InfoExtractor
+from ..compat import compat_urlparse
 from ..utils import (
-    compat_urlparse,
     int_or_none,
 )
 
index 1c765532a00d9274c2531277cc1ad81b75053dfd..014a219522d5de5ab85415cb8aeca0a93561a409 100644 (file)
@@ -3,8 +3,8 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_urllib_parse
 from ..utils import (
-    compat_urllib_parse,
     determine_ext,
     ExtractorError,
 )
index 1ca0b7cf2bf78717fd45a11d17e3cce7e5191b9b..98e1443ab0c3d380737f34be2c67fa760e08a221 100644 (file)
@@ -5,7 +5,7 @@ import json
 import itertools
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
 )
 
index acddbc8f1d19ebc48b721b3867b98fc30af3c133..9fb770cb1562f6efbe004d4b1689bf3aa1eab8d4 100644 (file)
@@ -4,9 +4,11 @@ import json
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
     compat_urlparse,
+)
+from ..utils import (
     ExtractorError,
 )
 
index c1fc433f77d7aec1dc80d2b7074865d02484a40b..003e50002c76e403e6a60defaafcf7595dd40052 100644 (file)
@@ -1,8 +1,8 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import compat_urllib_parse
 from ..utils import (
-    compat_urllib_parse,
     xpath_text,
     xpath_with_ns,
     int_or_none,
index 0d5889f5d17c17ffa75eeca1f1079efd7f9c2b8f..241b904a9e57f7cc3e61b6f086550578feb71b05 100644 (file)
@@ -4,8 +4,8 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_parse_qs
 from ..utils import (
-    compat_parse_qs,
     ExtractorError,
     int_or_none,
     unified_strdate,
@@ -29,10 +29,9 @@ class BiliBiliIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
+
         video_code = self._search_regex(
             r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
 
index bf18a97e04e59894a08f500244ec00a05b6a5306..1eca0047076f422bb77329e5b83657df44002738 100644 (file)
@@ -6,20 +6,21 @@ import json
 import xml.etree.ElementTree
 
 from .common import InfoExtractor
-from ..utils import (
-    compat_urllib_parse,
-    find_xpath_attr,
-    fix_xml_ampersands,
-    compat_urlparse,
-    compat_str,
-    compat_urllib_request,
+from ..compat import (
     compat_parse_qs,
+    compat_str,
+    compat_urllib_parse,
     compat_urllib_parse_urlparse,
-
+    compat_urllib_request,
+    compat_urlparse,
+)
+from ..utils import (
     determine_ext,
     ExtractorError,
-    unsmuggle_url,
+    find_xpath_attr,
+    fix_xml_ampersands,
     unescapeHTML,
+    unsmuggle_url,
 )
 
 
index 97feb6704075831fb8b5ef95a547428ecc57ec3f..2f866f3ef925c8402f00a3c0f922cf530eaa2010 100644 (file)
@@ -4,10 +4,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
     compat_urllib_parse,
     compat_urllib_parse_urlparse,
+)
+from ..utils import (
     ExtractorError,
 )
 
index 2e3ef3fdab4c25f2818f46a820702056ceb3f294..48e2410b621f64896ebe769d36cce722260db532 100644 (file)
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .mtv import MTVServicesInfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
     compat_urllib_parse,
+)
+from ..utils import (
     ExtractorError,
     float_or_none,
     unified_strdate,
index 7a7e79360423ec39e341cf651b2aef4ca762d244..3db4db4e4db816ae532060bc2386cd91a9c71a92 100644 (file)
@@ -5,12 +5,14 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
-    orderedSet,
     compat_urllib_parse_urlparse,
     compat_urlparse,
 )
+from ..utils import (
+    orderedSet,
+)
 
 
 class CondeNastIE(InfoExtractor):
index d7e2b841e10856cadf0526fe8ff6d4c280dc0dae..8f1ea02e74466ac4999356267eebedffe2f62daa 100644 (file)
@@ -10,10 +10,12 @@ import xml.etree.ElementTree
 from hashlib import sha1
 from math import pow, sqrt, floor
 from .subtitles import SubtitlesInfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
+    ExtractorError,
     bytes_to_intlist,
     intlist_to_bytes,
     unified_strdate,
index 936c13cd60b0ec44f376818a7e17cb9ccf4d1384..cf5841a7c6e92e115d7f685d8f7ce337a51cb92a 100644 (file)
@@ -8,13 +8,15 @@ import itertools
 from .common import InfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 
-from ..utils import (
-    compat_urllib_request,
+from ..compat import (
     compat_str,
+    compat_urllib_request,
+)
+from ..utils import (
+    ExtractorError,
+    int_or_none,
     orderedSet,
     str_to_int,
-    int_or_none,
-    ExtractorError,
     unescapeHTML,
 )
 
index 45d66e2e663fa376cec8f4fc7931e84006ee30b9..c6b813f58ed73e9b08f2584f84288fa821e91688 100644 (file)
@@ -5,7 +5,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
 )
 
index b766e17f26a9e79d654d4b160fa8f98f5f21503f..9cb1bf301b9ae3e327e4831bdb8a7d2437b43803 100644 (file)
@@ -1,8 +1,6 @@
 from __future__ import unicode_literals
 
-import re
-
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
 )
 from .common import InfoExtractor
@@ -24,11 +22,10 @@ class EHowIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
-                                       webpage, 'video URL')
+        video_url = self._search_regex(
+            r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL')
         final_url = compat_urllib_parse.unquote(video_url)
         uploader = self._html_search_meta('uploader', webpage)
         title = self._og_search_title(webpage).replace(' | eHow', '')
index f4c1e2a72bf74821afd476dee86b806c0dbb56c7..a30a1f3305ad9d2ba61552571accf23ec2625bff 100644 (file)
@@ -6,7 +6,7 @@ import random
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
 )
 
index 476fc22b93424b13255d5eec3578eb985dbfbdfd..e240cb8591ecc467c44d98742685740f4354cbda 100644 (file)
@@ -3,9 +3,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
-
+)
+from ..utils import (
     ExtractorError,
 )
 
index d237a82813ea2556175e32a882d87bd5d1831924..d872d828fcc8e10fea4770e1e56ab21cda027336 100644 (file)
@@ -3,8 +3,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
 )
 
index aacbf14141f6d5109d265b8e4dfa37883cee81ab..36ba331285b434136b8d3c10e6a8a16bef18e7b7 100644 (file)
@@ -3,16 +3,18 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
     compat_urllib_parse,
+)
+from ..utils import (
     str_to_int,
 )
 
 
 class ExtremeTubeIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
     _TESTS = [{
         'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
         'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
@@ -31,7 +33,7 @@ class ExtremeTubeIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = mobj.group('id')
         url = 'http://www.' + mobj.group('url')
 
         req = compat_urllib_request.Request(url)
index 6f5d23559b78dfb621bbf6d819612d20b96fd597..81ceace53289709b93d7c647f6627197320381ef 100644 (file)
@@ -1,19 +1,20 @@
 #! -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
-import re
 import hashlib
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
     compat_urllib_request,
     compat_urlparse,
 )
+from ..utils import (
+    ExtractorError,
+)
 
 
 class FC2IE(InfoExtractor):
-    _VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)'
+    _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
     IE_NAME = 'fc2'
     _TEST = {
         'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
@@ -26,9 +27,7 @@ class FC2IE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
         self._downloader.cookiejar.clear_session_cookies()  # must clear
 
index af439ccfeefeade46f75b693627b09ba6ed830d6..3191116d96a0df0e61081fbc85e5745c815f1f99 100644 (file)
@@ -4,11 +4,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
+from ..utils import (
+    ExtractorError,
+)
 
 
 class FiredriveIE(InfoExtractor):
@@ -28,11 +30,8 @@ class FiredriveIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         url = 'http://firedrive.com/file/%s' % video_id
-
         webpage = self._download_webpage(url, video_id)
 
         if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
index b22ce2acb5d1beae29d1b298b4ef63c8e7639e5f..7187e075291af16151e28b8041d7e06e49887fa7 100644 (file)
@@ -3,12 +3,14 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
-    unified_strdate,
-    str_to_int,
-    parse_duration,
+)
+from ..utils import (
     clean_html,
+    parse_duration,
+    str_to_int,
+    unified_strdate,
 )
 
 
@@ -31,9 +33,7 @@ class FourTubeIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage_url = 'http://www.4tube.com/videos/' + video_id
         webpage = self._download_webpage(webpage_url, video_id)
 
index 898e0dda780df7a83f91226216b7fce4d59818c2..0c29721629a25369621072e4f451e7decdc8df0b 100644 (file)
@@ -5,7 +5,7 @@ import json
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_parse_qs,
     compat_urlparse,
 )
index e0420a48f8cacb5661b6882f797c09755de4df46..bbc760a4990cac1b6cdb731c161d61c853a72729 100644 (file)
@@ -6,13 +6,15 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
+    compat_urllib_parse_urlparse,
     compat_urlparse,
-    ExtractorError,
+)
+from ..utils import (
     clean_html,
-    parse_duration,
-    compat_urllib_parse_urlparse,
+    ExtractorError,
     int_or_none,
+    parse_duration,
 )
 
 
index d570e3f6a85ca399d81328e3afedee4f98158e5f..47373e21540030d4c9a19dbfc1c5943f468fea4f 100644 (file)
@@ -4,9 +4,11 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urlparse,
+)
+from ..utils import (
     unescapeHTML,
 )
 
index de14ae1fb1edd0600488b8f04c7b400bf310ef5a..d453ec010937b1815bf3a22d568a70ce818224e6 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
index 53714f47f1a0a8cd1abb8aab0ec09cdbd283d51b..2bfb9904022c6a3830901baa2ee380b6f4f14714 100644 (file)
@@ -2,8 +2,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urlparse,
+)
+from ..utils import (
     determine_ext,
 )
 
index 469e1f9357eaf66ef48b3bed6c3d464c48f2b5dc..498304cb2bd9b605d44e67291a2f38bf4481a6f8 100644 (file)
@@ -4,7 +4,7 @@ import itertools
 import re
 
 from .common import SearchInfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
 )
 
index 1ac1da8569e20b84c2baf38011488d6304a090cb..ae24aff84fd85c6796c7a4374964f70629175f43 100644 (file)
@@ -4,11 +4,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    determine_ext,
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
+    ExtractorError,
     int_or_none,
 )
 
@@ -106,7 +107,6 @@ class GorillaVidIE(InfoExtractor):
         formats = [{
             'format_id': 'sd',
             'url': video_url,
-            'ext': determine_ext(video_url),
             'quality': 1,
         }]
 
index 8e812b66976e31e43ad594dbee6344c5e34629cf..704d0285d3e1c2ce10e8f3929543c6c66b0fd58a 100644 (file)
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+)
 from ..utils import (
     ExtractorError,
-    compat_urllib_request,
     int_or_none,
     urlencode_postdata,
 )
@@ -30,9 +32,7 @@ class HostingBulkIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         url = 'http://hostingbulk.com/{0:}.html'.format(video_id)
 
         # Custom request with cookie to set language to English, so our file
index 6d0d847c6d3461a02c6eab71b24848247e9678ab..aa0724a02353840e5f5533a1eedbc7005aa63008 100644 (file)
@@ -1,20 +1,20 @@
 from __future__ import unicode_literals
 
 import json
-import re
 import time
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
-
+)
+from ..utils import (
     ExtractorError,
 )
 
 
 class HypemIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
+    _VALID_URL = r'http://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'
     _TEST = {
         'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
         'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
@@ -27,8 +27,7 @@ class HypemIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        track_id = mobj.group(1)
+        track_id = self._match_id(url)
 
         data = {'ax': 1, 'ts': time.time()}
         data_encoded = compat_urllib_parse.urlencode(data)
index f2c1c10f5c1dec44129ea4cd7d4cff69c7c07206..13a53a0cb39f70ed1aaf1713283852cdc3cebeb4 100644 (file)
@@ -4,7 +4,7 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urlparse,
 )
 
index e76dd222d1ee81dc0e0b2d5b1b3c28ef22e1bd83..f25f43664e262b25473557c5f11dae91e697e3f6 100644 (file)
@@ -1,10 +1,9 @@
 from __future__ import unicode_literals
 
 import base64
-import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
 )
 
@@ -24,9 +23,7 @@ class InfoQIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
index 1e47991874ecf6afa75181f99b6bf98a8dd60916..c813d4b82921b7598ad0981e70a1241a8f9bda32 100644 (file)
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urlparse,
     compat_urllib_parse,
+)
+from ..utils import (
     xpath_with_ns,
 )
 
index 4247d6391fa25f674449d9d8ac44b428c7c387e0..8529bedfc0ab283790e74144bc9d570df19dc4b3 100644 (file)
@@ -6,8 +6,10 @@ from random import random
 from math import floor
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
 )
 
index f0fba1adb7dba9c4c09717132731d98ff0e5ffd3..7a400323dc4df3807057a77b25f7401ce5e2a3b8 100644 (file)
@@ -5,8 +5,10 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
 )
 
index 75b63cffb5961f33ea2d2f5ae37803dfb0fe37fc..97dcb518a3587406bc93a44c39344630cafe7119 100644 (file)
@@ -4,7 +4,7 @@ import os
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
     compat_urllib_parse,
@@ -15,7 +15,7 @@ from ..aes import (
 
 
 class KeezMoviesIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?keezmovies\.com/video/.+?(?P<videoid>[0-9]+)(?:[/?&]|$)'
+    _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
     _TEST = {
         'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
         'file': '1214711.mp4',
@@ -27,8 +27,7 @@ class KeezMoviesIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = self._match_id(url)
 
         req = compat_urllib_request.Request(url)
         req.add_header('Cookie', 'age_verified=1')
index 03c4691c6abb6794557a7d83b30fce49a2a2e4e7..5247c6f58500e301dab50ed48039df0c070b493a 100644 (file)
@@ -4,10 +4,12 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
     compat_urllib_parse_urlparse,
     compat_urlparse,
+)
+from ..utils import (
     ExtractorError,
     find_xpath_attr,
     int_or_none,
index 2160d6cb08ae5b71584ffdc0d76982e2a9bdf0c0..26e84970d49463068f032dcf05afbc03e485e859 100644 (file)
@@ -5,12 +5,14 @@ import json
 
 from .subtitles import SubtitlesInfoExtractor
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
+    compat_str,
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
     int_or_none,
-    compat_str,
 )
 
 
index 1abf6e4f85d52cbe4d257280cf26277e715dbdb1..0b85a59d1c644d7d04e573aae0bdd03ebd4f6c80 100644 (file)
@@ -1,43 +1,33 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
 )
 
 
 class MalemotionIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
+    _VALID_URL = r'https?://malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
     _TEST = {
-        'url': 'http://malemotion.com/video/bien-dur.10ew',
-        'file': '10ew.mp4',
-        'md5': 'b3cc49f953b107e4a363cdff07d100ce',
+        'url': 'http://malemotion.com/video/bete-de-concours.ltc',
+        'md5': '3013e53a0afbde2878bc39998c33e8a5',
         'info_dict': {
-            "title": "Bien dur",
-            "age_limit": 18,
+            'id': 'ltc',
+            'ext': 'mp4',
+            'title': 'Bête de Concours',
+            'age_limit': 18,
         },
-        'skip': 'This video has been deleted.'
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group("id")
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        self.report_extraction(video_id)
-
-        # Extract video URL
-        video_url = compat_urllib_parse.unquote(
-            self._search_regex(r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
-
-        # Extract title
+        video_url = compat_urllib_parse.unquote(self._search_regex(
+            r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
         video_title = self._html_search_regex(
             r'<title>(.*?)</title', webpage, 'title')
-
-        # Extract video thumbnail
         video_thumbnail = self._search_regex(
             r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False)
 
@@ -47,14 +37,12 @@ class MalemotionIE(InfoExtractor):
             'format_id': 'mp4',
             'preference': 1,
         }]
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
             'formats': formats,
-            'uploader': None,
-            'upload_date': None,
             'title': video_title,
             'thumbnail': video_thumbnail,
-            'description': None,
             'age_limit': 18,
         }
index 858c1c0c31f4c08c3068a62983781129288dc3b8..8bc333b0277e27e6fd8f3d4f11b3c9c7eabdd7d7 100644 (file)
@@ -3,10 +3,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_parse_qs,
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     determine_ext,
     ExtractorError,
     int_or_none,
index 807b1dc89b608333e06c1fbab2e9d806fb7d090f..d354702d1d5753a1cee6976c497edc9177e91ef6 100644 (file)
@@ -5,8 +5,10 @@ import json
 
 from .common import InfoExtractor
 from .youtube import YoutubeIE
-from ..utils import (
+from ..compat import (
     compat_urlparse,
+)
+from ..utils import (
     clean_html,
     ExtractorError,
     get_element_by_id,
index 6691521e58435682a74af87559ce1d1fd9046fbf..2567583235617e52b6420419863dbc8d319c8201 100644 (file)
@@ -1,12 +1,13 @@
 from __future__ import unicode_literals
 
-import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urlparse,
+)
+from ..utils import (
     get_element_by_attribute,
     parse_duration,
     strip_jsonp,
@@ -15,7 +16,7 @@ from ..utils import (
 
 class MiTeleIE(InfoExtractor):
     IE_NAME = 'mitele.es'
-    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<episode>[^/]+)/'
+    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
 
     _TEST = {
         'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
@@ -31,12 +32,10 @@ class MiTeleIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        episode = mobj.group('episode')
+        episode = self._match_id(url)
         webpage = self._download_webpage(url, episode)
         embed_data_json = self._search_regex(
-            r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
-            flags=re.DOTALL
+            r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
         ).replace('\'', '"')
         embed_data = json.loads(embed_data_json)
 
index 55cc33a3e4b94014f9d4642eff1d1a3d6dc26d05..07d194562e77044a8d8d87138ed32205842a1a25 100644 (file)
@@ -3,8 +3,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
+)
+from ..utils import (
     ExtractorError,
     HEADRequest,
     int_or_none,
index 2ff79b9b88590e87f1aecf9fdfd32f242bd98420..184f9c2c9e216a47b635680ba928f1c3c403619d 100644 (file)
@@ -5,10 +5,12 @@ import json
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
+    ExtractorError,
     int_or_none,
 )
 
index d658647e6ca6d9b7675dd76ea55c58f52887374d..2cec12d35ec1797dd7612ad49c5739e87f77e6c9 100644 (file)
@@ -4,7 +4,7 @@ import os
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
     compat_urllib_parse,
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class MofosexIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<id>[0-9]+)/.*?\.html)'
     _TEST = {
         'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
         'md5': '1b2eb47ac33cc75d4a80e3026b613c5a',
@@ -26,7 +26,7 @@ class MofosexIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = mobj.group('id')
         url = 'http://www.' + mobj.group('url')
 
         req = compat_urllib_request.Request(url)
index 1c4f589cce1605b17e099d47c050097fef1ad0a9..5de719bdc41d2af56d6133a85b998c4ed85af726 100644 (file)
@@ -5,7 +5,7 @@ import os.path
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
@@ -37,10 +37,9 @@ class MonikerIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         orig_webpage = self._download_webpage(url, video_id)
+
         fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
         data = dict(fields)
 
index 34a4bec3a0d1fb91208acce4684a4078188b847d..9f2853fa32a031bae5f4d5fab4ee9cab93dbafc8 100644 (file)
@@ -4,11 +4,13 @@ import re
 import time
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
     compat_urllib_request,
     compat_urllib_parse,
 )
+from ..utils import (
+    ExtractorError,
+)
 
 
 class MooshareIE(InfoExtractor):
@@ -43,9 +45,7 @@ class MooshareIE(InfoExtractor):
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         page = self._download_webpage(url, video_id, 'Downloading page')
 
         if re.search(r'>Video Not Found or Deleted<', page) is not None:
index 7c0ec6a127e97dca1068db9db740954e851a8447..f5ca74e976bc10ff896bf7e6134a14332c7b131c 100644 (file)
@@ -3,13 +3,14 @@ from __future__ import unicode_literals
 
 import hashlib
 import json
-import re
 import time
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_parse_qs,
     compat_str,
+)
+from ..utils import (
     int_or_none,
 )
 
@@ -32,10 +33,9 @@ class MotorsportIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('id')
-
+        display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
+
         flashvars_code = self._html_search_regex(
             r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars')
         flashvars = compat_parse_qs(flashvars_code)
index 456807dd1c4487332a4e0006448074010e86117b..04e17d0551c7a46feff1822c4dc4be38d00cc520 100644 (file)
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+)
 from ..utils import (
     ExtractorError,
-    compat_str,
     clean_html,
 )
 
index b482d6d4dfb09416bc46ef43ac21dff8bf2d5744..5ebc78033a4abbb98310096c279fe11459b4a791 100644 (file)
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
     find_xpath_attr,
     fix_xml_ampersands,
index 51e540814be209856a9a71f891e55eeb4ba559c2..5b9b9fbcd0844897d6d63305ed00729e70c7f4fb 100644 (file)
@@ -2,9 +2,10 @@ from __future__ import unicode_literals
 import os.path
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse_urlparse,
-
+)
+from ..utils import (
     ExtractorError,
 )
 
index fbe34defd868694d44f4371825322a41b39019a3..c10405f04d3cc1b3e89004029b7502112e9baa29 100644 (file)
@@ -4,8 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
+)
+from ..utils import (
     ExtractorError,
     clean_html,
 )
@@ -26,9 +28,9 @@ class NaverIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
+
         m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
                          webpage)
         if m_id is None:
index bf5132721ecfe03d5ced4e637c512fe23ff6791c..690c46b6a57be11edf36899b959318af5e482119 100644 (file)
@@ -4,8 +4,10 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
+)
+from ..utils import (
     ExtractorError,
     find_xpath_attr,
 )
index 7ce1d481d0a73218598bf24fee964ca8ec65956d..ea077254b4320fe18e59eb9b67461b13c146b873 100644 (file)
@@ -1,9 +1,7 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
     compat_urllib_parse,
 )
@@ -12,7 +10,7 @@ from ..utils import (
 class NFBIE(InfoExtractor):
     IE_NAME = 'nfb'
     IE_DESC = 'National Film Board of Canada'
-    _VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
 
     _TEST = {
         'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
@@ -32,10 +30,10 @@ class NFBIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
+        video_id = self._match_id(url)
+        page = self._download_webpage(
+            'https://www.nfb.ca/film/%s' % video_id, video_id,
+            'Downloading film page')
 
         uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
                                               page, 'director id', fatal=False)
index cc7c921c364d64ee504fa6d31265d13a96565e8d..606e2294efb716cfe755d1b9564357dbda7f9039 100644 (file)
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse_urlparse,
+)
 from ..utils import (
     ExtractorError,
-    compat_urllib_parse_urlparse,
     int_or_none,
     remove_end,
 )
index 1d9c1a096403e7e7b4f3835f31dbee31812594c9..4c18904169d3f69a0bf7e95fb21d98218bca7e91 100644 (file)
@@ -5,14 +5,16 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
     compat_urlparse,
-    unified_strdate,
-    parse_duration,
-    int_or_none,
+)
+from ..utils import (
     ExtractorError,
+    int_or_none,
+    parse_duration,
+    unified_strdate,
 )
 
 
index 7d2ff7b9a149d0284da1d52a26f4c0a85bf0f6ce..251e6da07457b7e7be6b5703b5769214ae299c3d 100644 (file)
@@ -6,13 +6,15 @@ import time
 import hashlib
 
 from .common import InfoExtractor
-from ..utils import (
-    compat_urllib_request,
+from ..compat import (
+    compat_str,
     compat_urllib_parse,
-    ExtractorError,
+    compat_urllib_request,
+)
+from ..utils import (
     clean_html,
+    ExtractorError,
     unified_strdate,
-    compat_str,
 )
 
 
index f3be8f552c3764995057acf18b74514537960d4e..f5ef856db0155dd84f10d5db4a8cef8e6c08213c 100644 (file)
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+)
 from ..utils import (
     ExtractorError,
-    compat_urllib_request,
     urlencode_postdata,
     xpath_text,
     xpath_with_ns,
@@ -32,8 +34,7 @@ class NosVideoIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         fields = {
             'id': video_id,
index 38d05e46604a859247c0b155625ee41f5b556b36..04d779890af1960d65b070d0b2f80e429db21d07 100644 (file)
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urlparse,
+)
 from ..utils import (
     ExtractorError,
-    compat_urlparse
 )
 
 
index 449c8a6a3e86c410daaafbf80878161693242e27..57928f2aedcc0acfa5ba71d6e9f0a62af9d67b71 100644 (file)
@@ -3,15 +3,17 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+)
 from ..utils import (
     parse_duration,
     unified_strdate,
-    compat_urllib_request,
 )
 
 
 class NuvidIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://m.nuvid.com/video/1310741/',
         'md5': 'eab207b7ac4fccfb4e23c86201f11277',
@@ -26,8 +28,7 @@ class NuvidIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         formats = []
 
index b4389e0b6feaf0726a4805bec674b77cd38e295b..c66db3cdc84e55a6a3a904ddf3ff7c09aaac9573 100644 (file)
@@ -4,16 +4,17 @@ import json
 import re
 
 from .common import InfoExtractor
-from ..utils import compat_urllib_parse
+from ..compat import compat_urllib_parse
 
 
 class PhotobucketIE(InfoExtractor):
     _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
     _TEST = {
         'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
-        'file': 'zpsc0c3b9fa.mp4',
         'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
         'info_dict': {
+            'id': 'zpsc0c3b9fa',
+            'ext': 'mp4',
             'timestamp': 1367669341,
             'upload_date': '20130504',
             'uploader': 'rachaneronas',
index 17880471d9d160f6d3315ca9c6eadeada8ce91a7..449d4836c3f7c0c8ce220cbcbd447e76cb17ea0f 100644 (file)
@@ -5,11 +5,13 @@ import re
 import os.path
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
+from ..utils import (
+    ExtractorError,
+)
 
 
 class PlayedIE(InfoExtractor):
@@ -28,7 +30,6 @@ class PlayedIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
         orig_webpage = self._download_webpage(url, video_id)
 
         m_error = re.search(
index ebc0468042a22c2bccdfb5b7e45861c0bc45f61c..9576aed0e6668189c1959df3166b1e550facc7b0 100644 (file)
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
     float_or_none,
     int_or_none,
index cd3905acb0fcd0ef9fe08beec2bda41fd2f94f70..c3e667e9e72ea0aaf6e5db731f630816e6a2861d 100644 (file)
@@ -3,31 +3,31 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+)
 from ..utils import (
-    ExtractorError,
     clean_html,
-    compat_urllib_parse,
+    ExtractorError,
 )
 
 
 class PlayvidIE(InfoExtractor):
-    _VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
+    _VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
     _TEST = {
-        'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
-        'md5': '44930f8afa616efdf9482daf4fe53e1e',
+        'url': 'http://www.playvid.com/watch/RnmBNgtrrJu',
+        'md5': 'ffa2f6b2119af359f544388d8c01eb6c',
         'info_dict': {
-            'id': 'agbDDi7WZTV',
+            'id': 'RnmBNgtrrJu',
             'ext': 'mp4',
-            'title': 'Michelle Lewin in Miami Beach',
-            'duration': 240,
+            'title': 'md5:9256d01c6317e3f703848b5906880dc8',
+            'duration': 82,
             'age_limit': 18,
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         m_error = re.search(
index 2ca15b717ec5dd9a36b6aa2bfc7e9019148c0493..634142d0d27300eb82ea2f460fd2163a20208709 100644 (file)
@@ -4,10 +4,12 @@ import os
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
+    compat_urllib_parse,
     compat_urllib_parse_urlparse,
     compat_urllib_request,
-    compat_urllib_parse,
+)
+from ..utils import (
     str_to_int,
 )
 from ..aes import (
@@ -16,7 +18,7 @@ from ..aes import (
 
 
 class PornHubIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'
+    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'
     _TEST = {
         'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
         'md5': '882f488fa1f0026f023f33576004a2ed',
index 7fcde086c0b234f7020cfa9811425d22355808a3..f536e6e6cdfb3d71e21c98614e2baf117387493b 100644 (file)
@@ -4,12 +4,14 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    determine_ext,
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
+from ..utils import (
+    determine_ext,
+    ExtractorError,
+)
 
 
 class PromptFileIE(InfoExtractor):
index 1262793c820f335dfe940eafb224495af168495e..385681d06e3dda356193d9f89c7ccbdd4cbde453 100644 (file)
@@ -5,8 +5,10 @@ import re
 
 from hashlib import sha1
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
+)
+from ..utils import (
     unified_strdate,
 )
 
index 3bc78060de3b0b4e9051c4ab07e74dc7d7ca5c9c..af7d76cf47e575277de3ffe480fdb8e1eb48b43c 100644 (file)
@@ -3,8 +3,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urlparse,
+)
+from ..utils import (
     determine_ext,
     int_or_none,
 )
index 2d39ecfe4faa537cb86f156df930dc2b19241a0b..aa26b7e0bb0f4f0a489ad4cfdef330c704747680 100644 (file)
@@ -3,10 +3,12 @@ from __future__ import unicode_literals
 import re
 
 from .subtitles import SubtitlesInfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+)
 from ..utils import (
     parse_duration,
     unified_strdate,
-    compat_urllib_parse,
 )
 
 
index dc59a5e5c0da8c6e2001b411a03451604ca190f3..5e84c109802e34ce8f57496ee3b7e2cd409c0788 100644 (file)
@@ -4,12 +4,14 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+)
 from ..utils import (
     int_or_none,
     parse_duration,
     parse_iso8601,
     unescapeHTML,
-    compat_str,
 )
 
 
index 6941d96fb52d23246656fbdd8afb892c32fcc7dd..b72b5a5869ae3ae6044ac210d55eca9df8a5ccf8 100644 (file)
@@ -5,10 +5,12 @@ import re
 import itertools
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
-    unified_strdate,
+)
+from ..utils import (
     ExtractorError,
+    unified_strdate,
 )
 
 
@@ -36,9 +38,7 @@ class RutubeIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         video = self._download_json(
             'http://rutube.ru/api/video/%s/?format=json' % video_id,
             video_id, 'Downloading video JSON')
@@ -114,8 +114,7 @@ class RutubeMovieIE(RutubeChannelIE):
     _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        movie_id = mobj.group('id')
+        movie_id = self._match_id(url)
         movie = self._download_json(
             self._MOVIE_TEMPLATE % movie_id, movie_id,
             'Downloading movie JSON')
index c145f6fc72f1b9eed8a5089dce48dfdd5f106a79..dfd897ba3a3f0a7297164fb315e4543bb597d678 100644 (file)
@@ -1,14 +1,14 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
     compat_parse_qs,
     compat_urllib_request,
 )
+from ..utils import (
+    ExtractorError,
+)
 
 
 class ScreencastIE(InfoExtractor):
@@ -57,8 +57,7 @@ class ScreencastIE(InfoExtractor):
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         video_url = self._html_search_regex(
index fdc31603a709676713f7ba87325f4b2ba6b47f3e..26ced716e8a875f1c4c5c9527b856475dce83f9e 100644 (file)
@@ -4,10 +4,12 @@ import re
 import base64
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_request,
+)
 from ..utils import (
     ExtractorError,
-    compat_urllib_request,
-    compat_urllib_parse,
     int_or_none,
 )
 
@@ -26,26 +28,30 @@ class SharedIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        page = self._download_webpage(url, video_id)
-
-        if re.search(r'>File does not exist<', page) is not None:
-            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
 
-        download_form = dict(re.findall(r'<input type="hidden" name="([^"]+)" value="([^"]*)"', page))
+        if '>File does not exist<' in webpage:
+            raise ExtractorError(
+                'Video %s does not exist' % video_id, expected=True)
 
-        request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(download_form))
+        download_form = dict(re.findall(
+            r'<input type="hidden" name="([^"]+)" value="([^"]*)"', webpage))
+        request = compat_urllib_request.Request(
+            url, compat_urllib_parse.urlencode(download_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
 
-        video_page = self._download_webpage(request, video_id, 'Downloading video page')
+        video_page = self._download_webpage(
+            request, video_id, 'Downloading video page')
 
-        video_url = self._html_search_regex(r'data-url="([^"]+)"', video_page, 'video URL')
-        title = base64.b64decode(self._html_search_meta('full:title', page, 'title')).decode('utf-8')
-        filesize = int_or_none(self._html_search_meta('full:size', page, 'file size', fatal=False))
+        video_url = self._html_search_regex(
+            r'data-url="([^"]+)"', video_page, 'video URL')
+        title = base64.b64decode(self._html_search_meta(
+            'full:title', webpage, 'title')).decode('utf-8')
+        filesize = int_or_none(self._html_search_meta(
+            'full:size', webpage, 'file size', fatal=False))
         thumbnail = self._html_search_regex(
-            r'data-poster="([^"]+)"', video_page, 'thumbnail', fatal=False, default=None)
+            r'data-poster="([^"]+)"', video_page, 'thumbnail', default=None)
 
         return {
             'id': video_id,
index 7531e8325bf88e3d89958dca1107334c41b78c6c..ac3e3adf22ad194a8af3e833ae4d8acf7484e8b4 100644 (file)
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     parse_duration,
 )
 
index 5eadbb7eaea263b8a37307fbdcc01c3e54c5eaa2..a63d126d4560dda83133fa6280116ca517e71bdc 100644 (file)
@@ -4,7 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
     compat_urllib_parse,
 )
index 5864b9936cca2e4d0ba3a0fa217884c21f897ed7..e7d776e7bd8bd3334ff0da1203cd91d52508a6ef 100644 (file)
@@ -4,8 +4,10 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urlparse,
+)
+from ..utils import (
     ExtractorError,
 )
 
index 646af3cc9c9686b7d09fdc87b305c0b7c6c0f8ce..d031fe40167f7ff704c26fde3972d4fa45f1b3b5 100644 (file)
@@ -7,9 +7,11 @@ import hashlib
 import uuid
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
     int_or_none,
     unified_strdate,
index c663e56d42ed02645313637cd7866a9071d10ae7..7d3c0e93783afeac3d8e939e0cf317177df4ca9f 100644 (file)
@@ -1,13 +1,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from ..utils import (
-    ExtractorError,
+import re
+
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     determine_ext,
+    ExtractorError,
 )
-import re
 
 from .common import InfoExtractor
 
@@ -27,9 +30,7 @@ class SockshareIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         url = 'http://sockshare.com/file/%s' % video_id
         webpage = self._download_webpage(url, video_id)
 
index ab9483d2dc0136258084a794fd6945680d2aae47..5d60c4939588ad543840b501ef0e552ad0b1e673 100644 (file)
@@ -5,11 +5,12 @@ import re
 import itertools
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
     compat_urlparse,
     compat_urllib_parse,
-
+)
+from ..utils import (
     ExtractorError,
     int_or_none,
     unified_strdate,
index 94602e89e56549243ed38ecb107ef842cd8ebd46..b936202f6f3005fe9ae085724566d709c6a484cc 100644 (file)
@@ -3,12 +3,14 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
+    compat_urllib_parse,
     compat_urllib_parse_urlparse,
     compat_urllib_request,
-    compat_urllib_parse,
-    unified_strdate,
+)
+from ..utils import (
     str_to_int,
+    unified_strdate,
 )
 from ..aes import aes_decrypt_text
 
index 057ef5251dc6855c32a8df5abed6917556d190e9..2f57f5b7c76944589b3b8f091849604d9d4e3bf0 100644 (file)
@@ -4,8 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
+)
+from ..utils import (
     parse_iso8601,
 )
 
index c1178f26de0b961ad68eb6d1ddb89550746f4dd7..38176498dd05aeea99e45379984f31fdec994d5a 100644 (file)
@@ -5,7 +5,7 @@ import re
 import time
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
index 77870483998d90baa29a8ca03ae9ce8a0783e497..c3ceb5f76d450001affda86e79466607b677e8f5 100644 (file)
@@ -1,13 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
-import re
-import json
-
 from .common import InfoExtractor
-from ..compat import (
-    compat_str,
-)
 from ..utils import (
     int_or_none,
 )
index 283e11350b212db0c857f1ccdb8982519a78cfbb..f1f43d0a7113cbf40e5dfd3ffb71af5e900fab78 100644 (file)
@@ -4,10 +4,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+)
 from ..utils import (
-    ExtractorError,
     clean_html,
-    compat_urllib_request,
+    ExtractorError,
     float_or_none,
     parse_iso8601,
 )
index 72160503ccd52e2f84e843184fbdf0e92080a19f..944177426d5d719d152d3474f5b059410cb27955 100644 (file)
@@ -5,7 +5,7 @@ import re
 
 from .subtitles import SubtitlesInfoExtractor
 
-from ..utils import (
+from ..compat import (
     compat_str,
 )
 
index e2653d62dc8c288ce8e58e5bfda52793aef7cfaf..af6ef0033af061713d81785734381a6beffc2c6d 100644 (file)
@@ -4,8 +4,10 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_str,
+)
+from ..utils import (
     determine_ext,
     ExtractorError,
     xpath_with_ns,
index 66d159e99f6b15c01d53017b24b0a70b57470bd3..9f9e388c50948d658d1022f8514122643b623a03 100644 (file)
@@ -5,7 +5,7 @@ import re
 from .common import InfoExtractor
 from .brightcove import BrightcoveIE
 from .discovery import DiscoveryIE
-from ..utils import compat_urlparse
+from ..compat import compat_urlparse
 
 
 class TlcIE(DiscoveryIE):
index 64a1e903022a78fa3a2b15eeff5eed20afce568d..d73ad3762a1b455cfd4bc384c27e2dd85e776dde 100644 (file)
@@ -4,9 +4,11 @@ import json
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
+)
+from ..utils import (
     int_or_none,
     str_to_int,
 )
index d516b6427bd271fa8f7e1129cdbbcd9dda692ae1..4de0aac523313eced334aab38a9a20c7bf08dfc7 100644 (file)
@@ -1,10 +1,9 @@
 from __future__ import unicode_literals
 
 import base64
-import re
 
 from .common import InfoExtractor
-from ..utils import compat_parse_qs
+from ..compat import compat_parse_qs
 
 
 class TutvIE(InfoExtractor):
@@ -20,10 +19,9 @@ class TutvIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
+
         internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')
 
         data_content = self._download_webpage(
index 397d167e89cef763d977b657f2998ae6a363dec3..715f9930c4d4a7df315968b37ab3051802db7a7c 100644 (file)
@@ -5,9 +5,11 @@ import itertools
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
     parse_iso8601,
 )
index 5271611ac9f883af6a63e371b919c68398adaca0..4667ed83b71f4aec5f081741834e2c9cca010e82 100644 (file)
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
 )
 
index 249a34c86dba694881c8672889cb77cf1910853c..8872cfcb2795ab0bfb9db1ad5418eb61dd0dffc6 100644 (file)
@@ -1,11 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
+)
+from ..utils import (
     unified_strdate,
 )
 
index 53dc3a496ff65edf044137540080d9190ad8d72b..68d03b99905cce848eb38fde8b6d8e643c548105 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urlparse,
 )
 
index 455b6d9da62f221cf0854655f707d5963546840f..dd026748dcbb536f9f49181b0d211bf0a9157777 100644 (file)
@@ -1,19 +1,18 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
-
+)
+from ..utils import (
     ExtractorError,
 )
 
 
 class Vbox7IE(InfoExtractor):
-    _VALID_URL = r'http://(www\.)?vbox7\.com/play:(?P<id>[^/]+)'
+    _VALID_URL = r'http://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)'
     _TEST = {
         'url': 'http://vbox7.com/play:249bb972c2',
         'md5': '99f65c0c9ef9b682b97313e052734c3f',
@@ -25,8 +24,7 @@ class Vbox7IE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         redirect_page, urlh = self._download_webpage_handle(url, video_id)
         new_location = self._search_regex(r'window\.location = \'(.*)\';',
index 94647d1c8c88a18cfb6abcba2ec5ed8e71e9c4b6..815f5846804a3cebcf716d8d20b6373e44fd14fd 100644 (file)
@@ -4,10 +4,12 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urlparse,
-    get_element_by_id,
+)
+from ..utils import (
     clean_html,
+    get_element_by_id,
 )
 
 
@@ -26,8 +28,7 @@ class VeeHDIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         # VeeHD seems to send garbage on the first request.
         # See https://github.com/rg3/youtube-dl/issues/2102
index a7953a7e7c5d33b154435cd7b4afa354994f4bf5..01e258e32218c227c5de3caf60588baab56e9045 100644 (file)
@@ -4,8 +4,10 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
+)
+from ..utils import (
     int_or_none,
     ExtractorError,
 )
index c912c3cbe7ae42b221816b50ec6a97139cb13d55..43f6b029da8ff5df7fe808c11a85f8a8120f8ca5 100644 (file)
@@ -4,8 +4,10 @@ import re
 import xml.etree.ElementTree
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
+)
+from ..utils import (
     ExtractorError,
 )
 
index ac6c255376442d132948eb5f54e0517bca5a66f4..0ffc7ff7dc9185a3a3ec5c0fd14d302872662dda 100644 (file)
@@ -1,10 +1,8 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
+from ..compat import compat_urlparse
 from .internetvideoarchive import InternetVideoArchiveIE
-from ..utils import compat_urlparse
 
 
 class VideoDetectiveIE(InfoExtractor):
@@ -17,13 +15,12 @@ class VideoDetectiveIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'KICK-ASS 2',
             'description': 'md5:65ba37ad619165afac7d432eaded6013',
-            'duration': 135,
+            'duration': 138,
         },
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
         og_video = self._og_search_video_url(webpage)
         query = compat_urlparse.urlparse(og_video).query
index 29c4e0101ec21eb59c22de9739a516b9f96c0e0f..7a78f0d264a47fc85c53cd815242e629ee38ed30 100644 (file)
@@ -1,11 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
+)
+from ..utils import (
     remove_start,
 )
 
@@ -27,9 +27,7 @@ class VideoMegaIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id)
         webpage = self._download_webpage(url, video_id)
 
index ca6b0d5b3369c53b7e06715ea5a56fd338d71e41..542e9198ac0e5a9470ad5e16717bf9ca006050af 100644 (file)
@@ -5,14 +5,17 @@ import re
 import json
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+    compat_urllib_parse,
+    compat_urllib_request,
+)
 from ..utils import (
     ExtractorError,
-    compat_urllib_request,
-    compat_urllib_parse,
-    compat_str,
+    orderedSet,
     unescapeHTML,
     unified_strdate,
-    orderedSet)
+)
 
 
 class VKIE(InfoExtractor):
index affef650726d716b7e80aaab5c66dab3bc3ddc28..1c0966a793511a2ec3a9d147bd75ff22e8fb7209 100644 (file)
@@ -2,8 +2,9 @@
 from __future__ import unicode_literals
 
 import re
+
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
 )
@@ -24,8 +25,7 @@ class VodlockerIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         fields = dict(re.findall(r'''(?x)<input\s+
index 1b2f731e932a63fbc7722251c0b4e57f0963c34c..405cb9db49f41a144a4c842d8f99aeb1c2023da9 100644 (file)
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+)
 from ..utils import (
     int_or_none,
-    compat_str,
     ExtractorError,
 )
 
index ec3c010ad7e151bfc304315cdc5fd32bc21e8f43..c3fde53f5ef06a56b54e94b20b72a7e98c1992a5 100644 (file)
@@ -3,8 +3,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse_urlparse,
+)
+from ..utils import (
     ExtractorError,
     parse_duration,
     qualities,
@@ -25,10 +27,9 @@ class VuClipIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
+
         ad_m = re.search(
             r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage)
         if ad_m:
index 93a6e64542c71be1f05dac1e351d5c57ecd28ff7..8e25ecf280769166a49d18cfd2508bd6d90caa74 100644 (file)
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_parse_qs,
     compat_urlparse,
+)
+from ..utils import (
     determine_ext,
     unified_strdate,
 )
@@ -141,7 +143,6 @@ class WDRMobileIE(InfoExtractor):
             'title': mobj.group('title'),
             'age_limit': int(mobj.group('age_limit')),
             'url': url,
-            'ext': determine_ext(url),
             'user_agent': 'mobile',
         }
 
index 748443f811f184d4276d4628cd13ed1e2bf92d9c..13a079151c9c879561e3e538c49f3122f85b349b 100644 (file)
@@ -1,9 +1,8 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import ExtractorError, compat_urllib_request
+from ..compat import compat_urllib_request
+from ..utils import ExtractorError
 
 
 class WistiaIE(InfoExtractor):
@@ -22,8 +21,7 @@ class WistiaIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         request = compat_urllib_request.Request(self._API_URL.format(video_id))
         request.add_header('Referer', url)  # Some videos require this.
index 1b4e883652667f2c2109d34014154c71fd443196..80c48c37d32c0849e689d626811ee34c5b414ee0 100644 (file)
@@ -1,9 +1,7 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
 )
 
@@ -23,10 +21,9 @@ class XBefIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
+
         title = self._html_search_regex(
             r'<h1[^>]*>(.*?)</h1>', webpage, 'title')
 
index 53ed7ef5a6ea95826d0324bac53a71bea4913fa4..79ed6c744242bf132afd033ae35949cc1e2263b5 100644 (file)
@@ -1,10 +1,8 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
 )
 
@@ -23,10 +21,7 @@ class XNXXIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        # Get webpage content
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         video_url = self._search_regex(r'flv_url=(.*?)&amp;',
index 38448e7c0fbfe3641cc364a2707a97910ab16cf8..f9d98b83fb47aef2f927c21c102ced7943c11e53 100644 (file)
@@ -4,15 +4,17 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_request,
+)
+from ..utils import (
     parse_duration,
     str_to_int,
 )
 
 
 class XTubeIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<id>[^/?&]+))'
     _TEST = {
         'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
         'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
@@ -29,7 +31,7 @@ class XTubeIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = mobj.group('id')
         url = 'http://www.' + mobj.group('url')
 
         req = compat_urllib_request.Request(url)
index 7e00448246beb9ab9b7c25f33b05e6f4f1bb8283..2a45dc574263f7e651020e591fcc40bdf987367d 100644 (file)
@@ -3,15 +3,17 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse,
-    ExtractorError,
+)
+from ..utils import (
     clean_html,
+    ExtractorError,
 )
 
 
 class XVideosIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
+    _VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P<id>[0-9]+)(?:.*)'
     _TEST = {
         'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl',
         'md5': '4b46ae6ea5e6e9086e714d883313c0c9',
@@ -24,37 +26,25 @@ class XVideosIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        self.report_extraction(video_id)
-
         mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
         if mobj:
             raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
 
-        # Extract video URL
         video_url = compat_urllib_parse.unquote(
             self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL'))
-
-        # Extract title
         video_title = self._html_search_regex(
             r'<title>(.*?)\s+-\s+XVID', webpage, 'title')
-
-        # Extract video thumbnail
         video_thumbnail = self._search_regex(
             r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False)
 
         return {
             'id': video_id,
             'url': video_url,
-            'uploader': None,
-            'upload_date': None,
             'title': video_title,
             'ext': 'flv',
             'thumbnail': video_thumbnail,
-            'description': None,
             'age_limit': 18,
         }
index 0fdb122436d9b7e158a706cd1048630e252d5666..031226f2764032b9f1af16f5c9d74188a89cdc05 100644 (file)
@@ -6,11 +6,13 @@ import json
 import re
 
 from .common import InfoExtractor, SearchInfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
     compat_urllib_parse,
     compat_urlparse,
+)
+from ..utils import (
     clean_html,
+    ExtractorError,
     int_or_none,
 )
 
index 7b621a9e32b3cc4521a2988f00e32252cb17bd65..894678a23dac9d1b03e07f0cd9b2eecc7e690e18 100644 (file)
@@ -5,7 +5,7 @@ import re
 import json
 
 from .common import InfoExtractor
-from ..utils import compat_urllib_parse
+from ..compat import compat_urllib_parse
 
 
 class YnetIE(InfoExtractor):
index d9c06a2ee6d934391560b6ca7db4ebecf40ff1d0..107c9ac36e4f4f48bd768567e4399af15fd07743 100644 (file)
@@ -6,10 +6,11 @@ import re
 import sys
 
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
-
+)
+from ..utils import (
     ExtractorError,
     unescapeHTML,
     unified_strdate,
index 2bd264b306f8dc8f53bf6e0b9dec97ecd6e85cb4..e60505ace8b8451666f2aeebea3277bc58cb6297 100644 (file)
@@ -4,8 +4,8 @@ import collections
 import io
 import zlib
 
+from .compat import compat_str
 from .utils import (
-    compat_str,
     ExtractorError,
     struct_unpack,
 )