Merge pull request #8898 from dstftw/fragment-retries
authorSergey M <dstftw@gmail.com>
Wed, 23 Mar 2016 15:12:32 +0000 (20:12 +0500)
committerSergey M <dstftw@gmail.com>
Wed, 23 Mar 2016 15:12:32 +0000 (20:12 +0500)
Add --fragment-retries option (Fixes #8466)

155 files changed:
AUTHORS
Makefile
README.md
test/test_http.py
tox.ini
youtube_dl/YoutubeDL.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/abc.py
youtube_dl/extractor/addanime.py
youtube_dl/extractor/aftonbladet.py
youtube_dl/extractor/aljazeera.py
youtube_dl/extractor/animeondemand.py
youtube_dl/extractor/aol.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/azubu.py
youtube_dl/extractor/baidu.py
youtube_dl/extractor/bbc.py
youtube_dl/extractor/behindkink.py
youtube_dl/extractor/bilibili.py
youtube_dl/extractor/biobiochiletv.py [new file with mode: 0644]
youtube_dl/extractor/bokecc.py
youtube_dl/extractor/bpb.py
youtube_dl/extractor/breakcom.py
youtube_dl/extractor/camdemy.py
youtube_dl/extractor/cbsnews.py
youtube_dl/extractor/cbssports.py
youtube_dl/extractor/cda.py [new file with mode: 0755]
youtube_dl/extractor/ceskatelevize.py
youtube_dl/extractor/cliphunter.py
youtube_dl/extractor/clipsyndicate.py
youtube_dl/extractor/clubic.py
youtube_dl/extractor/comcarcoff.py
youtube_dl/extractor/condenast.py
youtube_dl/extractor/cspan.py
youtube_dl/extractor/ctsnews.py
youtube_dl/extractor/dctp.py
youtube_dl/extractor/defense.py
youtube_dl/extractor/discovery.py
youtube_dl/extractor/douyutv.py
youtube_dl/extractor/dplay.py
youtube_dl/extractor/dreisat.py
youtube_dl/extractor/dvtv.py
youtube_dl/extractor/echomsk.py
youtube_dl/extractor/exfm.py
youtube_dl/extractor/fc2.py
youtube_dl/extractor/firstpost.py
youtube_dl/extractor/firsttv.py
youtube_dl/extractor/fktv.py
youtube_dl/extractor/footyroom.py
youtube_dl/extractor/foxgay.py
youtube_dl/extractor/franceinter.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/freevideo.py
youtube_dl/extractor/gamekings.py
youtube_dl/extractor/gamespot.py
youtube_dl/extractor/gamestar.py
youtube_dl/extractor/gametrailers.py
youtube_dl/extractor/hotnewhiphop.py
youtube_dl/extractor/hypem.py
youtube_dl/extractor/imdb.py
youtube_dl/extractor/iqiyi.py
youtube_dl/extractor/jadorecettepub.py
youtube_dl/extractor/jeuxvideo.py
youtube_dl/extractor/karaoketv.py
youtube_dl/extractor/karrierevideos.py
youtube_dl/extractor/kontrtube.py
youtube_dl/extractor/ku6.py
youtube_dl/extractor/kusi.py
youtube_dl/extractor/kuwo.py
youtube_dl/extractor/laola1tv.py
youtube_dl/extractor/leeco.py
youtube_dl/extractor/lifenews.py
youtube_dl/extractor/limelight.py
youtube_dl/extractor/m6.py
youtube_dl/extractor/mailru.py
youtube_dl/extractor/metacafe.py
youtube_dl/extractor/mit.py
youtube_dl/extractor/mitele.py
youtube_dl/extractor/mooshare.py
youtube_dl/extractor/motherless.py
youtube_dl/extractor/motorsport.py
youtube_dl/extractor/myspass.py
youtube_dl/extractor/myvideo.py
youtube_dl/extractor/myvidster.py
youtube_dl/extractor/nationalgeographic.py
youtube_dl/extractor/nbc.py
youtube_dl/extractor/nextmedia.py
youtube_dl/extractor/noco.py
youtube_dl/extractor/normalboots.py
youtube_dl/extractor/nova.py
youtube_dl/extractor/npr.py
youtube_dl/extractor/ntvru.py
youtube_dl/extractor/once.py
youtube_dl/extractor/openload.py [new file with mode: 0644]
youtube_dl/extractor/orf.py
youtube_dl/extractor/philharmoniedeparis.py
youtube_dl/extractor/photobucket.py
youtube_dl/extractor/porn91.py
youtube_dl/extractor/pornhd.py
youtube_dl/extractor/pornovoisines.py
youtube_dl/extractor/pyvideo.py
youtube_dl/extractor/qqmusic.py
youtube_dl/extractor/rai.py
youtube_dl/extractor/redtube.py
youtube_dl/extractor/ringtv.py
youtube_dl/extractor/rtve.py
youtube_dl/extractor/ruhd.py
youtube_dl/extractor/rutube.py
youtube_dl/extractor/rutv.py
youtube_dl/extractor/safari.py
youtube_dl/extractor/screenjunkies.py
youtube_dl/extractor/senateisvp.py
youtube_dl/extractor/shared.py
youtube_dl/extractor/sport5.py
youtube_dl/extractor/ssa.py
youtube_dl/extractor/sztvhu.py
youtube_dl/extractor/teamcoco.py
youtube_dl/extractor/tele13.py
youtube_dl/extractor/tf1.py
youtube_dl/extractor/thescene.py [new file with mode: 0644]
youtube_dl/extractor/thvideo.py
youtube_dl/extractor/tinypic.py
youtube_dl/extractor/tlc.py
youtube_dl/extractor/toypics.py
youtube_dl/extractor/traileraddict.py
youtube_dl/extractor/trollvids.py
youtube_dl/extractor/tudou.py
youtube_dl/extractor/tumblr.py
youtube_dl/extractor/tunein.py
youtube_dl/extractor/tv2.py
youtube_dl/extractor/tvc.py
youtube_dl/extractor/tvplay.py
youtube_dl/extractor/ubu.py
youtube_dl/extractor/unistra.py
youtube_dl/extractor/vbox7.py
youtube_dl/extractor/veoh.py
youtube_dl/extractor/vesti.py
youtube_dl/extractor/vgtv.py
youtube_dl/extractor/videott.py
youtube_dl/extractor/viidea.py
youtube_dl/extractor/vlive.py
youtube_dl/extractor/vube.py
youtube_dl/extractor/vuclip.py
youtube_dl/extractor/walla.py
youtube_dl/extractor/wat.py
youtube_dl/extractor/wdr.py
youtube_dl/extractor/weiqitv.py
youtube_dl/extractor/wimp.py
youtube_dl/extractor/xbef.py
youtube_dl/extractor/xhamster.py
youtube_dl/extractor/yam.py
youtube_dl/extractor/ynet.py
youtube_dl/options.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/utils.py

diff --git a/AUTHORS b/AUTHORS
index aa48cd5a6015aa965a23b4203349e3bc0a6f690d..51dfc8dddeb2a3f118f2c3fa1e0886cafb14ae60 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -163,3 +163,6 @@ Patrick Griffis
 Aidan Rowe
 mutantmonkey
 Ben Congdon
+Kacper Michajłow
+José Joaquín Atria
+Viťas Strádal
index e98806791327feaa67cd780e5395ce0cc960bbd8..6689ec06f4b3e903a0d369bf2900374a71b7d939 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -12,15 +12,7 @@ SHAREDIR ?= $(PREFIX)/share
 PYTHON ?= /usr/bin/env python
 
 # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
-ifeq ($(PREFIX),/usr)
-       SYSCONFDIR=/etc
-else
-       ifeq ($(PREFIX),/usr/local)
-               SYSCONFDIR=/etc
-       else
-               SYSCONFDIR=$(PREFIX)/etc
-       endif
-endif
+SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi
 
 install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
        install -d $(DESTDIR)$(BINDIR)
index 68db546ef33b951f30d3639fccddeeb960ae5c91..fcc12d2b3fc9c5219384d7a6bdcbe9b22238f628 100644 (file)
--- a/README.md
+++ b/README.md
@@ -831,7 +831,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file
 If you want to create a build of youtube-dl yourself, you'll need
 
 * python
-* make
+* make (both GNU make and BSD make are supported)
 * pandoc
 * zip
 * nosetests
index fc59b1aed6ddc2db10598a1a4b954a128e3d3133..15e0ad369d57966bef222bf35c422ad9bdb4e755 100644 (file)
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# coding: utf-8
 from __future__ import unicode_literals
 
 # Allow direct execution
@@ -120,5 +121,14 @@ class TestProxy(unittest.TestCase):
         response = ydl.urlopen(req).read().decode('utf-8')
         self.assertEqual(response, 'cn: {0}'.format(url))
 
+    def test_proxy_with_idn(self):
+        ydl = YoutubeDL({
+            'proxy': 'localhost:{0}'.format(self.port),
+        })
+        url = 'http://中文.tw/'
+        response = ydl.urlopen(url).read().decode('utf-8')
+        # b'xn--fiq228c' is '中文'.encode('idna')
+        self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/tox.ini b/tox.ini
index 48504329fee28fa47f98d8b1c7f4d39911ccac9f..2d71340050bf8f8a971acb3931621f62ded02176 100644 (file)
--- a/tox.ini
+++ b/tox.ini
@@ -8,6 +8,6 @@ deps =
 passenv = HOME
 defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
     --exclude test_subtitles.py --exclude test_write_annotations.py
-    --exclude test_youtube_lists.py
+    --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
 commands = nosetests --verbose {posargs:{[testenv]defaultargs}}  # --with-coverage --cover-package=youtube_dl --cover-html
                                                # test.test_download:TestDownload.test_NowVideo
index 93b6ca54d615206c029e75aa5dd96a61a650dabd..29d7a31062da5cbe033b776213c5c9c4431f3dc7 100755 (executable)
@@ -1836,7 +1836,7 @@ class YoutubeDL(object):
         if fdict.get('language'):
             if res:
                 res += ' '
-            res += '[%s]' % fdict['language']
+            res += '[%s] ' % fdict['language']
         if fdict.get('format_note') is not None:
             res += fdict['format_note'] + ' '
         if fdict.get('tbr') is not None:
index 529051a93da610b1b427c7b8b4fafdd291dcd138..8f7df4d121c718bdb3b6cb52b591209aed31f156 100644 (file)
@@ -72,6 +72,7 @@ from .bet import BetIE
 from .bigflix import BigflixIE
 from .bild import BildIE
 from .bilibili import BiliBiliIE
+from .biobiochiletv import BioBioChileTVIE
 from .bleacherreport import (
     BleacherReportIE,
     BleacherReportCMSIE,
@@ -108,6 +109,7 @@ from .cbsnews import (
 )
 from .cbssports import CBSSportsIE
 from .ccc import CCCIE
+from .cda import CDAIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .chaturbate import ChaturbateIE
@@ -533,6 +535,7 @@ from .ooyala import (
     OoyalaIE,
     OoyalaExternalIE,
 )
+from .openload import OpenloadIE
 from .ora import OraTVIE
 from .orf import (
     ORFTVthekIE,
@@ -628,6 +631,7 @@ from .ruutu import RuutuIE
 from .sandia import SandiaIE
 from .safari import (
     SafariIE,
+    SafariApiIE,
     SafariCourseIE,
 )
 from .sapo import SapoIE
@@ -739,6 +743,7 @@ from .theplatform import (
     ThePlatformIE,
     ThePlatformFeedIE,
 )
+from .thescene import TheSceneIE
 from .thesixtyone import TheSixtyOneIE
 from .thestar import TheStarIE
 from .thisamericanlife import ThisAmericanLifeIE
index 6a29e587f007b28cfaf8bd2e714e276ab0b681a1..b584277be92b5a86fb9e0ac5d95870444d441174 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 class ABCIE(InfoExtractor):
     IE_NAME = 'abc.net.au'
-    _VALID_URL = r'http://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
+    _VALID_URL = r'https?://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
index e3e6d21137994593d593fbc51313bf38032ce7f8..fb1cc02e11f38ba4961a66e418c1f66af403e853 100644 (file)
@@ -16,7 +16,7 @@ from ..utils import (
 
 
 class AddAnimeIE(InfoExtractor):
-    _VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
+    _VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
     _TESTS = [{
         'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
         'md5': '72954ea10bc979ab5e2eb288b21425a0',
index e0518cf261fbffc4dd23bc4a3800d04eae324139..d548592fe8acbbf2db432db3ed699b80b78e0aa0 100644 (file)
@@ -6,7 +6,7 @@ from ..utils import int_or_none
 
 
 class AftonbladetIE(InfoExtractor):
-    _VALID_URL = r'http://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
         'info_dict': {
index cddcaa489791eb9393d7832350935f2cab8b7c79..b081695d8400c0e24d36e84bd8445efa084ed8b3 100644 (file)
@@ -4,7 +4,7 @@ from .common import InfoExtractor
 
 
 class AlJazeeraIE(InfoExtractor):
-    _VALID_URL = r'http://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
+    _VALID_URL = r'https?://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
 
     _TEST = {
         'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
index 0158407f6a89a66f8b6d1a5b560dc3794b5152e3..2cede55a7e497f16a49a03ad8476d96f1ae9b433 100644 (file)
@@ -3,10 +3,14 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+    compat_urlparse,
+    compat_str,
+)
 from ..utils import (
     determine_ext,
     encode_dict,
+    extract_attributes,
     ExtractorError,
     sanitized_Request,
     urlencode_postdata,
@@ -34,6 +38,10 @@ class AnimeOnDemandIE(InfoExtractor):
         # Episodes without titles
         'url': 'https://www.anime-on-demand.de/anime/162',
         'only_matching': True,
+    }, {
+        # ger/jap, Dub/OmU, account required
+        'url': 'https://www.anime-on-demand.de/anime/169',
+        'only_matching': True,
     }]
 
     def _login(self):
@@ -44,6 +52,10 @@ class AnimeOnDemandIE(InfoExtractor):
         login_page = self._download_webpage(
             self._LOGIN_URL, None, 'Downloading login page')
 
+        if '>Our licensing terms allow the distribution of animes only to German-speaking countries of Europe' in login_page:
+            self.raise_geo_restricted(
+                '%s is only available in German-speaking countries of Europe' % self.IE_NAME)
+
         login_form = self._form_hidden_inputs('new_user', login_page)
 
         login_form.update({
@@ -126,33 +138,86 @@ class AnimeOnDemandIE(InfoExtractor):
 
             formats = []
 
-            playlist_url = self._search_regex(
-                r'data-playlist=(["\'])(?P<url>.+?)\1',
-                episode_html, 'data playlist', default=None, group='url')
-            if playlist_url:
-                request = sanitized_Request(
-                    compat_urlparse.urljoin(url, playlist_url),
-                    headers={
-                        'X-Requested-With': 'XMLHttpRequest',
-                        'X-CSRF-Token': csrf_token,
-                        'Referer': url,
-                        'Accept': 'application/json, text/javascript, */*; q=0.01',
-                    })
-
-                playlist = self._download_json(
-                    request, video_id, 'Downloading playlist JSON', fatal=False)
-                if playlist:
-                    playlist = playlist['playlist'][0]
-                    title = playlist['title']
+            for input_ in re.findall(
+                    r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', episode_html):
+                attributes = extract_attributes(input_)
+                playlist_urls = []
+                for playlist_key in ('data-playlist', 'data-otherplaylist'):
+                    playlist_url = attributes.get(playlist_key)
+                    if isinstance(playlist_url, compat_str) and re.match(
+                            r'/?[\da-zA-Z]+', playlist_url):
+                        playlist_urls.append(attributes[playlist_key])
+                if not playlist_urls:
+                    continue
+
+                lang = attributes.get('data-lang')
+                lang_note = attributes.get('value')
+
+                for playlist_url in playlist_urls:
+                    kind = self._search_regex(
+                        r'videomaterialurl/\d+/([^/]+)/',
+                        playlist_url, 'media kind', default=None)
+                    format_id_list = []
+                    if lang:
+                        format_id_list.append(lang)
+                    if kind:
+                        format_id_list.append(kind)
+                    if not format_id_list:
+                        format_id_list.append(compat_str(num))
+                    format_id = '-'.join(format_id_list)
+                    format_note = ', '.join(filter(None, (kind, lang_note)))
+                    request = sanitized_Request(
+                        compat_urlparse.urljoin(url, playlist_url),
+                        headers={
+                            'X-Requested-With': 'XMLHttpRequest',
+                            'X-CSRF-Token': csrf_token,
+                            'Referer': url,
+                            'Accept': 'application/json, text/javascript, */*; q=0.01',
+                        })
+                    playlist = self._download_json(
+                        request, video_id, 'Downloading %s playlist JSON' % format_id,
+                        fatal=False)
+                    if not playlist:
+                        continue
+                    start_video = playlist.get('startvideo', 0)
+                    playlist = playlist.get('playlist')
+                    if not playlist or not isinstance(playlist, list):
+                        continue
+                    playlist = playlist[start_video]
+                    title = playlist.get('title')
+                    if not title:
+                        continue
                     description = playlist.get('description')
                     for source in playlist.get('sources', []):
                         file_ = source.get('file')
-                        if file_ and determine_ext(file_) == 'm3u8':
-                            formats = self._extract_m3u8_formats(
+                        if not file_:
+                            continue
+                        ext = determine_ext(file_)
+                        format_id_list = [lang, kind]
+                        if ext == 'm3u8':
+                            format_id_list.append('hls')
+                        elif source.get('type') == 'video/dash' or ext == 'mpd':
+                            format_id_list.append('dash')
+                        format_id = '-'.join(filter(None, format_id_list))
+                        if ext == 'm3u8':
+                            file_formats = self._extract_m3u8_formats(
                                 file_, video_id, 'mp4',
-                                entry_protocol='m3u8_native', m3u8_id='hls')
+                                entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False)
+                        elif source.get('type') == 'video/dash' or ext == 'mpd':
+                            continue
+                            file_formats = self._extract_mpd_formats(
+                                file_, video_id, mpd_id=format_id, fatal=False)
+                        else:
+                            continue
+                        for f in file_formats:
+                            f.update({
+                                'language': lang,
+                                'format_note': format_note,
+                            })
+                        formats.extend(file_formats)
 
             if formats:
+                self._sort_formats(formats)
                 f = common_info.copy()
                 f.update({
                     'title': title,
@@ -161,16 +226,18 @@ class AnimeOnDemandIE(InfoExtractor):
                 })
                 entries.append(f)
 
-            m = re.search(
-                r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
-                episode_html)
-            if m:
-                f = common_info.copy()
-                f.update({
-                    'id': '%s-teaser' % f['id'],
-                    'title': m.group('title'),
-                    'url': compat_urlparse.urljoin(url, m.group('href')),
-                })
-                entries.append(f)
+            # Extract teaser only when full episode is not available
+            if not formats:
+                m = re.search(
+                    r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
+                    episode_html)
+                if m:
+                    f = common_info.copy()
+                    f.update({
+                        'id': '%s-teaser' % f['id'],
+                        'title': m.group('title'),
+                        'url': compat_urlparse.urljoin(url, m.group('href')),
+                    })
+                    entries.append(f)
 
         return self.playlist_result(entries, anime_id, anime_title, anime_description)
index b761b2cc4c5d3d4b70766ed56ff5c3529dd39e6b..95a99c6b0d567c52b477a1964d9c055d0b0a6b8a 100644 (file)
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 
 class AolIE(InfoExtractor):
     IE_NAME = 'on.aol.com'
-    _VALID_URL = r'(?:aol-video:|http://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)'
+    _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)'
 
     _TESTS = [{
         'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
@@ -25,7 +25,7 @@ class AolIE(InfoExtractor):
 
 class AolFeaturesIE(InfoExtractor):
     IE_NAME = 'features.aol.com'
-    _VALID_URL = r'http://features\.aol\.com/video/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://features\.aol\.com/video/(?P<id>[^/?#]+)'
 
     _TESTS = [{
         'url': 'http://features.aol.com/video/behind-secret-second-careers-late-night-talk-show-hosts',
index 3e119e21b39ba2ab6bc504cf1d19a90008bfbd24..ae0f27dcbe059c0d469eaeca243ef59400ff68d6 100644 (file)
@@ -23,7 +23,7 @@ from ..utils import (
 
 
 class ArteTvIE(InfoExtractor):
-    _VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html'
+    _VALID_URL = r'https?://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html'
     IE_NAME = 'arte.tv'
 
     def _real_extract(self, url):
index 011edf128c2a688bc4ef56e487872ff1b15cee66..1805b7312a6e2d0743219b323307f56e0b0e5638 100644 (file)
@@ -98,7 +98,7 @@ class AzubuIE(InfoExtractor):
 
 
 class AzubuLiveIE(InfoExtractor):
-    _VALID_URL = r'http://www.azubu.tv/(?P<id>[^/]+)$'
+    _VALID_URL = r'https?://www.azubu.tv/(?P<id>[^/]+)$'
 
     _TEST = {
         'url': 'http://www.azubu.tv/MarsTVMDLen',
index 76b21e5962eae26e6bceef265c56e1f1a16ca922..234a661d34623b0b2da3028b20bcc23fc11e2991 100644 (file)
@@ -9,7 +9,7 @@ from ..utils import unescapeHTML
 
 class BaiduVideoIE(InfoExtractor):
     IE_DESC = '百度视频'
-    _VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
+    _VALID_URL = r'https?://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
     _TESTS = [{
         'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
         'info_dict': {
index e62b3860e99b106d08ef79cf593e180fe8c9496c..2dfcee98d5b5ac9e01c611878ad9b073436983df 100644 (file)
@@ -942,7 +942,7 @@ class BBCIE(BBCCoUkIE):
 
 
 class BBCCoUkArticleIE(InfoExtractor):
-    _VALID_URL = 'http://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
+    _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
     IE_NAME = 'bbc.co.uk:article'
     IE_DESC = 'BBC articles'
 
index 1bdc25812b6afb4cf133007f2d12b89fd56b353f..9bca853b32979a4e2700f5d121c24a08fd875224 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import url_basename
 
 
 class BehindKinkIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
+    _VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
     _TEST = {
         'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
         'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
index 59beb11bce71bfc6ef9b036ad123dc44e872d0be..8baff2041bb380d0204895cbbc6c64b16be94993 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class BiliBiliIE(InfoExtractor):
-    _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'
+    _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'
 
     _TESTS = [{
         'url': 'http://www.bilibili.tv/video/av1074402/',
diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dl/extractor/biobiochiletv.py
new file mode 100644 (file)
index 0000000..1332281
--- /dev/null
@@ -0,0 +1,86 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import remove_end
+
+
+class BioBioChileTVIE(InfoExtractor):
+    _VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
+
+    _TESTS = [{
+        'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
+        'md5': '26f51f03cf580265defefb4518faec09',
+        'info_dict': {
+            'id': 'sobre-camaras-y-camarillas-parlamentarias',
+            'ext': 'mp4',
+            'title': 'Sobre Cámaras y camarillas parlamentarias',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'Fernando Atria',
+        },
+    }, {
+        # different uploader layout
+        'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
+        'md5': 'edc2e6b58974c46d5b047dea3c539ff3',
+        'info_dict': {
+            'id': 'natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades',
+            'ext': 'mp4',
+            'title': 'Natalia Valdebenito repasa a diputado Hasbún: Pasó a la categoría de hablar brutalidades',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': 'Piangella Obrador',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
+        'only_matching': True,
+    }, {
+        'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
+
+        file_url = self._search_regex(
+            r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P<url>.+?)\1',
+            webpage, 'file url', group='url')
+
+        base_url = self._search_regex(
+            r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*fileURL', webpage,
+            'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/',
+            group='url')
+
+        formats = self._extract_m3u8_formats(
+            '%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4',
+            entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+        f = {
+            'url': '%s%s' % (base_url, file_url),
+            'format_id': 'http',
+            'protocol': 'http',
+            'preference': 1,
+        }
+        if formats:
+            f_copy = formats[-1].copy()
+            f_copy.update(f)
+            f = f_copy
+        formats.append(f)
+        self._sort_formats(formats)
+
+        thumbnail = self._og_search_thumbnail(webpage)
+        uploader = self._html_search_regex(
+            r'<a[^>]+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)</a>',
+            webpage, 'uploader', fatal=False)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'formats': formats,
+        }
index 122a1cbb6a5a85ea457b7652523f9dfbc3406be3..86a7f4d7d6261966101508023450e08f5d6c30d7 100644 (file)
@@ -33,7 +33,7 @@ class BokeCCBaseIE(InfoExtractor):
 
 class BokeCCIE(BokeCCBaseIE):
     _IE_DESC = 'CC视频'
-    _VALID_URL = r'http://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
+    _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
 
     _TESTS = [{
         'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B',
index c28e72927fefc90c320d1b11b31db5b3d7e754d1..6ad45a1e6a30bac2450743de3f0d12a2c9f2b89d 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 class BpbIE(InfoExtractor):
     IE_DESC = 'Bundeszentrale für politische Bildung'
-    _VALID_URL = r'http://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'
+    _VALID_URL = r'https?://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'
 
     _TEST = {
         'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
index aa08051b168cee479a416647645acdd6543ac71d..725859b4d2d554df91ff4793a2b3d245f02c8996 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class BreakIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
         'info_dict': {
index 897f3a104ce2d31aeac99e98197557ef502faf18..dd4d96cecd82764aa8ae77203d5a92e43ffe3acc 100644 (file)
@@ -16,7 +16,7 @@ from ..utils import (
 
 
 class CamdemyIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
     _TESTS = [{
         # single file
         'url': 'http://www.camdemy.com/media/5181/',
@@ -104,7 +104,7 @@ class CamdemyIE(InfoExtractor):
 
 
 class CamdemyFolderIE(InfoExtractor):
-    _VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)'
+    _VALID_URL = r'https?://www.camdemy.com/folder/(?P<id>\d+)'
     _TESTS = [{
         # links with trailing slash
         'url': 'http://www.camdemy.com/folder/450',
index 8ddcc5097e3bb7b83392e056080846daaab30924..f23bac9a1ff279ed28b43fa52f080bc2c4d651d7 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 class CBSNewsIE(ThePlatformIE):
     IE_DESC = 'CBS News'
-    _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
+    _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
 
     _TESTS = [
         {
@@ -96,7 +96,7 @@ class CBSNewsIE(ThePlatformIE):
 
 class CBSNewsLiveVideoIE(InfoExtractor):
     IE_DESC = 'CBS News Live Videos'
-    _VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
+    _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
 
     _TEST = {
         'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
index ae47e74ccf583ac9d821dd588f07f33ff57673db..549ae32f36c8ebd258896d4189ba90ae501c40d0 100644 (file)
@@ -6,7 +6,7 @@ from .common import InfoExtractor
 
 
 class CBSSportsIE(InfoExtractor):
-    _VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
 
     _TEST = {
         'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py
new file mode 100755 (executable)
index 0000000..498d2c0
--- /dev/null
@@ -0,0 +1,96 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    decode_packed_codes,
+    ExtractorError,
+    parse_duration
+)
+
+
+class CDAIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
+    _TESTS = [{
+        'url': 'http://www.cda.pl/video/5749950c',
+        'md5': '6f844bf51b15f31fae165365707ae970',
+        'info_dict': {
+            'id': '5749950c',
+            'ext': 'mp4',
+            'height': 720,
+            'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
+            'duration': 39
+        }
+    }, {
+        'url': 'http://www.cda.pl/video/57413289',
+        'md5': 'a88828770a8310fc00be6c95faf7f4d5',
+        'info_dict': {
+            'id': '57413289',
+            'ext': 'mp4',
+            'title': 'Lądowanie na lotnisku na Maderze',
+            'duration': 137
+        }
+    }, {
+        'url': 'http://ebd.cda.pl/0x0/5749950c',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id)
+
+        if 'Ten film jest dostępny dla użytkowników premium' in webpage:
+            raise ExtractorError('This video is only available for premium users.', expected=True)
+
+        title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
+
+        formats = []
+
+        info_dict = {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'duration': None,
+        }
+
+        def extract_format(page, version):
+            unpacked = decode_packed_codes(page)
+            format_url = self._search_regex(
+                r"url:\\'(.+?)\\'", unpacked, '%s url' % version, fatal=False)
+            if not format_url:
+                return
+            f = {
+                'url': format_url,
+            }
+            m = re.search(
+                r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
+                page)
+            if m:
+                f.update({
+                    'format_id': m.group('format_id'),
+                    'height': int(m.group('height')),
+                })
+            info_dict['formats'].append(f)
+            if not info_dict['duration']:
+                info_dict['duration'] = parse_duration(self._search_regex(
+                    r"duration:\\'(.+?)\\'", unpacked, 'duration', fatal=False))
+
+        extract_format(webpage, 'default')
+
+        for href, resolution in re.findall(
+                r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
+                webpage):
+            webpage = self._download_webpage(
+                href, video_id, 'Downloading %s version information' % resolution, fatal=False)
+            if not webpage:
+                # Manually report warning because empty page is returned when
+                # invalid version is requested.
+                self.report_warning('Unable to download %s version information' % resolution)
+                continue
+            extract_format(webpage, resolution)
+
+        self._sort_formats(formats)
+
+        return info_dict
index b27b4e6708675027ccc0337f5abc44b4c480816b..b355111cbef2306805b4a3c445b87a275b4697ab 100644 (file)
@@ -129,7 +129,8 @@ class CeskaTelevizeIE(InfoExtractor):
             formats = []
             for format_id, stream_url in item['streamUrls'].items():
                 formats.extend(self._extract_m3u8_formats(
-                    stream_url, playlist_id, 'mp4', entry_protocol='m3u8_native'))
+                    stream_url, playlist_id, 'mp4',
+                    entry_protocol='m3u8_native', fatal=False))
             self._sort_formats(formats)
 
             item_id = item.get('id') or item['assetId']
index 2996b6b09e81fcd0e04038d1744f2fdf3d54e694..19f8b397e44a679ea936ad638048ccb488dc4b93 100644 (file)
@@ -19,7 +19,7 @@ def _decode(s):
 class CliphunterIE(InfoExtractor):
     IE_NAME = 'cliphunter'
 
-    _VALID_URL = r'''(?x)http://(?:www\.)?cliphunter\.com/w/
+    _VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/
         (?P<id>[0-9]+)/
         (?P<seo>.+?)(?:$|[#\?])
     '''
index 8306d6fb7d0d4414cff36f7b381ca9c877820f58..0b6ad895fd7841e70b7dc0dd136052ff0459dd3c 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import (
 
 
 class ClipsyndicateIE(InfoExtractor):
-    _VALID_URL = r'http://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
index 1dfa7c12e19dd3151b9fdbda9c76ad4f58fa3192..2fba93543474cd7ebd53848aca62848c32bf7164 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class ClubicIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html'
 
     _TESTS = [{
         'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',
index 7dff684929721699bb3df90a5a4ce52f0552848b..e697d14107534e57845ea661864826ec4843735d 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class ComCarCoffIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
+    _VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
     _TESTS = [{
         'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
         'info_dict': {
index 6f92ae2ed0cd1383f66d21b3a9274be151ca32b2..054978ff23b367484c32c2142906c27c28379b38 100644 (file)
@@ -45,7 +45,7 @@ class CondeNastIE(InfoExtractor):
         'wmagazine': 'W Magazine',
     }
 
-    _VALID_URL = r'http://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
+    _VALID_URL = r'https?://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
     IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
 
     EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys())
index b8b9d058ddce866fa497597863c80d1de0a7f5c5..84b36f44cfac7bd45a8a7d28adb6767093a7d19b 100644 (file)
@@ -15,7 +15,7 @@ from .senateisvp import SenateISVPIE
 
 
 class CSpanIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
+    _VALID_URL = r'https?://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
     IE_DESC = 'C-SPAN'
     _TESTS = [{
         'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
index 45049bf371370da6e4b64952441e76d86814fd6a..1622fc844a1b8d4794fc12694f03f37c00076f15 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import parse_iso8601, ExtractorError
 class CtsNewsIE(InfoExtractor):
     IE_DESC = '華視新聞'
     # https connection failed (Connection reset)
-    _VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
+    _VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
     _TESTS = [{
         'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
         'md5': 'a9875cb790252b08431186d741beaabe',
index aa2c09eb686f9da5a7bedfdfe57566e9d29a0700..9099f5046a14ad7c769a6da50d813076f8b9231e 100644 (file)
@@ -6,7 +6,7 @@ from ..compat import compat_str
 
 
 class DctpTvIE(InfoExtractor):
-    _VALID_URL = r'http://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'
+    _VALID_URL = r'https?://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'
     _TEST = {
         'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
         'info_dict': {
index 98e3aedfd08ada1300cbf3114a41022949062402..9fe144e1431941051f3f2b7134fd9eb888522e96 100644 (file)
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 
 class DefenseGouvFrIE(InfoExtractor):
     IE_NAME = 'defense.gouv.fr'
-    _VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
+    _VALID_URL = r'https?://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
 
     _TEST = {
         'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
index ce680a9f3192832a7fb9e35956382f85bc60d043..fdce1429a62acb579f1e3acbd5e883a8444dd6f9 100644 (file)
@@ -9,7 +9,7 @@ from ..compat import compat_str
 
 
 class DiscoveryIE(InfoExtractor):
-    _VALID_URL = r'''(?x)http://(?:www\.)?(?:
+    _VALID_URL = r'''(?x)https?://(?:www\.)?(?:
             discovery|
             investigationdiscovery|
             discoverylife|
index bdc768c783b9b3213badc5cf4b354f6159142f9f..bcb670945d27b31802bf27390feb4f6cbd53cd6f 100644 (file)
@@ -10,7 +10,7 @@ from ..compat import (compat_str, compat_basestring)
 
 class DouyuTVIE(InfoExtractor):
     IE_DESC = '斗鱼'
-    _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
     _TESTS = [{
         'url': 'http://www.douyutv.com/iseven',
         'info_dict': {
index a638c827c7e01ed8acee28a091d3cdcff510ada0..1e7dcada614dbe4d0cd65efa2e8ff41c790c007b 100644 (file)
@@ -10,7 +10,7 @@ from ..utils import int_or_none
 
 
 class DPlayIE(InfoExtractor):
-    _VALID_URL = r'http://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
 
     _TESTS = [{
         'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/',
index 028144f20b3458e61ed214703d5f6fd17ab79871..0040e70d4929828ebf2dc7dc74199ed639dcfebd 100644 (file)
@@ -7,7 +7,7 @@ from .zdf import ZDFIE
 
 class DreiSatIE(ZDFIE):
     IE_NAME = '3sat'
-    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
+    _VALID_URL = r'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
     _TESTS = [
         {
             'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
index c1a4bc757f78770179f332d651227f47cceb8a99..974c69dbc75fcb29bd57e30432fa466182b68743 100644 (file)
@@ -15,7 +15,7 @@ class DVTVIE(InfoExtractor):
     IE_NAME = 'dvtv'
     IE_DESC = 'http://video.aktualne.cz/'
 
-    _VALID_URL = r'http://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
+    _VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
 
     _TESTS = [{
         'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',
index d2d94049d368e74413d93ca40628e3d5174a7675..6b7cc652fe43c60cb8d8326f1cf6bd0c51fbd59f 100644 (file)
@@ -7,7 +7,7 @@ from .common import InfoExtractor
 
 
 class EchoMskIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.echo.msk.ru/sounds/1464134.html',
         'md5': '2e44b3b78daff5b458e4dbc37f191f7c',
index 0c0fe6d652b11c2d30746f7d4da232ee4a1d65e3..09ed4f2b5644c5c8d55ea944d98e1684acacc125 100644 (file)
@@ -8,7 +8,7 @@ from .common import InfoExtractor
 class ExfmIE(InfoExtractor):
     IE_NAME = 'exfm'
     IE_DESC = 'ex.fm'
-    _VALID_URL = r'http://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'
     _SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
     _TESTS = [
         {
index 9580f5c0c5d1f4eb9bd54eb76126744322ae85e9..508684d2eec8786c83ce3dce8cb4e8f85fa56673 100644 (file)
@@ -17,7 +17,7 @@ from ..utils import (
 
 
 class FC2IE(InfoExtractor):
-    _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
+    _VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
     IE_NAME = 'fc2'
     _NETRC_MACHINE = 'fc2'
     _TESTS = [{
index 298227d5793770c82d8868256d655fa7ea3dc31c..e8936cb2468f78bd2c1b59008a13e9411204380e 100644 (file)
@@ -4,7 +4,7 @@ from .common import InfoExtractor
 
 
 class FirstpostIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
 
     _TEST = {
         'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html',
index 510d4b108944d1f220c45ddc2fbe85cdad6114ca..98b165143fe8b3f3e970ad602856b4266c59701c 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import int_or_none
 class FirstTVIE(InfoExtractor):
     IE_NAME = '1tv'
     IE_DESC = 'Первый канал'
-    _VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
+    _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
 
     _TESTS = [{
         'url': 'http://www.1tv.ru/videoarchive/73390',
index 5f6e65daed2d5dc2c18c09a97450ae2a9c88e2df..a3a2915998dc1cc2fca8f5ccdf6cec6cac0d528b 100644 (file)
@@ -10,7 +10,7 @@ from ..utils import (
 
 class FKTVIE(InfoExtractor):
     IE_NAME = 'fernsehkritik.tv'
-    _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
+    _VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
 
     _TEST = {
         'url': 'http://fernsehkritik.tv/folge-1',
index 370fd006fe015e4ab1a017d10fd8784b33ba034a..d2503ae2eff3d2e46497bbcba356af11db665452 100644 (file)
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 
 
 class FootyRoomIE(InfoExtractor):
-    _VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://footyroom\.com/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
         'info_dict': {
index 08b8ea36235993f78ce7a4ba05ac05f255ee4ea7..70c1a815d3121bf048da9510a00abf10dc516126 100644 (file)
@@ -4,7 +4,7 @@ from .common import InfoExtractor
 
 
 class FoxgayIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
+    _VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
     _TEST = {
         'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
         'md5': '80d72beab5d04e1655a56ad37afe6841',
index 0388ba00c2a7ab5bfa4cf622c359c697615daec4..2369f868da4a39b1cf84c7cee6a5830859484082 100644 (file)
@@ -6,7 +6,7 @@ from ..utils import int_or_none
 
 
 class FranceInterIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
         'md5': '4764932e466e6f6c79c317d2e74f6884',
index 3f4ac30939cd38a7294f0d8eb815cb58c6f2fbcd..ad94e31f346cc97cd71ad1be9f6983a16b6df209 100644 (file)
@@ -60,28 +60,31 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
                     video_id, 'Downloading f4m manifest token', fatal=False)
                 if f4m_url:
                     formats.extend(self._extract_f4m_formats(
-                        f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, 1, format_id))
+                        f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
+                        video_id, f4m_id=format_id, fatal=False))
             elif ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id))
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id=format_id, fatal=False))
             elif video_url.startswith('rtmp'):
                 formats.append({
                     'url': video_url,
                     'format_id': 'rtmp-%s' % format_id,
                     'ext': 'flv',
-                    'preference': 1,
                 })
             else:
-                formats.append({
-                    'url': video_url,
-                    'format_id': format_id,
-                    'preference': -1,
-                })
+                if self._is_valid_url(video_url, video_id, format_id):
+                    formats.append({
+                        'url': video_url,
+                        'format_id': format_id,
+                    })
         self._sort_formats(formats)
 
         title = info['titre']
         subtitle = info.get('sous_titre')
         if subtitle:
             title += ' - %s' % subtitle
+        title = title.strip()
 
         subtitles = {}
         subtitles_list = [{
@@ -125,13 +128,13 @@ class PluzzIE(FranceTVBaseInfoExtractor):
 
 class FranceTvInfoIE(FranceTVBaseInfoExtractor):
     IE_NAME = 'francetvinfo.fr'
-    _VALID_URL = r'https?://(?:www|mobile)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
+    _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
 
     _TESTS = [{
         'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
         'info_dict': {
             'id': '84981923',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Soir 3',
             'upload_date': '20130826',
             'timestamp': 1377548400,
@@ -139,6 +142,10 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
                 'fr': 'mincount:2',
             },
         },
+        'params': {
+            # m3u8 downloads
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
         'info_dict': {
@@ -155,11 +162,32 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
         'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
         'md5': 'f485bda6e185e7d15dbc69b72bae993e',
         'info_dict': {
-            'id': '556e03339473995ee145930c',
+            'id': 'NI_173343',
             'ext': 'mp4',
             'title': 'Les entreprises familiales : le secret de la réussite',
             'thumbnail': 're:^https?://.*\.jpe?g$',
-        }
+            'timestamp': 1433273139,
+            'upload_date': '20150602',
+        },
+        'params': {
+            # m3u8 downloads
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
+        'md5': 'f485bda6e185e7d15dbc69b72bae993e',
+        'info_dict': {
+            'id': 'NI_657393',
+            'ext': 'mp4',
+            'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de l’Armor"',
+            'description': 'md5:a3264114c9d29aeca11ced113c37b16c',
+            'thumbnail': 're:^https?://.*\.jpe?g$',
+            'timestamp': 1458300695,
+            'upload_date': '20160318',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
@@ -172,7 +200,9 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
             return self.url_result(dmcloud_url, 'DailymotionCloud')
 
         video_id, catalogue = self._search_regex(
-            r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@')
+            (r'id-video=([^@]+@[^"]+)',
+             r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
+            webpage, 'video id').split('@')
         return self._extract_video(video_id, catalogue)
 
 
index c7bec027bbe37d8c13f645ae918655caeb5a53cd..cd8423a6faff4431c310e1258396bbb58dc92a14 100644 (file)
@@ -5,7 +5,7 @@ from ..utils import ExtractorError
 
 
 class FreeVideoIE(InfoExtractor):
-    _VALID_URL = r'^http://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])'
+    _VALID_URL = r'^https?://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])'
 
     _TEST = {
         'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html',
index f6b9046f943c702262897002b07c418e3e666b3d..cbcddcb7cd116a05a8b294c06988586aec955c51 100644 (file)
@@ -10,7 +10,7 @@ from .youtube import YoutubeIE
 
 
 class GamekingsIE(InfoExtractor):
-    _VALID_URL = r'http://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)'
     _TESTS = [{
         # YouTube embed video
         'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
index b3f1bafcc37ee98f1c5b89a644909f3ee0a32049..4ffdd75157486957810f718cb1019cdc5dd80f4f 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class GameSpotIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
+    _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
     _TESTS = [{
         'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
         'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
index 590ccf5266d61e67772a1276a83bfdb6919abc63..69058a5835f2bac0d1e56ce0917909df0fb9a92b 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class GameStarIE(InfoExtractor):
-    _VALID_URL = r'http://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
+    _VALID_URL = r'https?://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
     _TEST = {
         'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
         'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
index c3f031d9cd4341184cc3b70eea77c1f360a1a3c6..1e7948ab816f5b08ee6dbeb39de1d5f50fbdf314 100644 (file)
@@ -9,7 +9,7 @@ from ..utils import (
 
 
 class GametrailersIE(InfoExtractor):
-    _VALID_URL = r'http://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)'
+    _VALID_URL = r'https?://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)'
 
     _TEST = {
         'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review',
index 31e219945412398909053ff464245763a671ae19..efc3e8429956b38016e3b9a6c7d84cbaed367ff8 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class HotNewHipHopIE(InfoExtractor):
-    _VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
+    _VALID_URL = r'https?://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
     _TEST = {
         'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
         'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
index b3706fe6d6cd8dfabb8d8b614baf1a6e12ea75d9..e0ab318022ba4291771d2e9d146566e4f0437daf 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class HypemIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'
+    _VALID_URL = r'https?://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'
     _TEST = {
         'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
         'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
index b61b2dc4e0e36867c55dcd62068466f1080a22ad..8bed8ccd06e2eeb64eba69f3407c9271c0643731 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 class ImdbIE(InfoExtractor):
     IE_NAME = 'imdb'
     IE_DESC = 'Internet Movie Database trailers'
-    _VALID_URL = r'http://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'
 
     _TEST = {
         'url': 'http://www.imdb.com/video/imdb/vi2524815897',
@@ -70,7 +70,7 @@ class ImdbIE(InfoExtractor):
 class ImdbListIE(InfoExtractor):
     IE_NAME = 'imdb:list'
     IE_DESC = 'Internet Movie Database lists'
-    _VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
+    _VALID_URL = r'https?://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
     _TEST = {
         'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
         'info_dict': {
index e7c0cb3f66ab542e79f86238d2db991047d6d453..1a4c647136f75a2c1460c7a9c8075baa8c502831 100644 (file)
@@ -165,7 +165,7 @@ class IqiyiIE(InfoExtractor):
     IE_NAME = 'iqiyi'
     IE_DESC = '爱奇艺'
 
-    _VALID_URL = r'http://(?:[^.]+\.)?iqiyi\.com/.+\.html'
+    _VALID_URL = r'https?://(?:[^.]+\.)?iqiyi\.com/.+\.html'
 
     _NETRC_MACHINE = 'iqiyi'
 
index 063e86de46c896c94be505ae916fd6f3fbdedc02..158c09a33a77a00a6766a572a4a18572a883dded 100644 (file)
@@ -9,7 +9,7 @@ from .youtube import YoutubeIE
 
 
 class JadoreCettePubIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
+    _VALID_URL = r'https?://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
 
     _TEST = {
         'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',
index 137db873cc09f7e57b258bcf65b8331d8b36b8c0..1a4227f6b4b0ef7370b0f09613ef9d4b8916b435 100644 (file)
@@ -8,7 +8,7 @@ from .common import InfoExtractor
 
 
 class JeuxVideoIE(InfoExtractor):
-    _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
+    _VALID_URL = r'https?://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
 
     _TESTS = [{
         'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
index 06daf5a89ce3ffde4d71d7dc8ceee9441840b72b..b4c30b7f3145fef78ec107d402c97927f1a8ad2e 100644 (file)
@@ -9,7 +9,7 @@ from ..utils import (
 
 
 class KaraoketvIE(InfoExtractor):
-    _VALID_URL = r'http://karaoketv\.co\.il/\?container=songs&id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://karaoketv\.co\.il/\?container=songs&id=(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://karaoketv.co.il/?container=songs&id=171568',
         'info_dict': {
index bed94bc9338d158c77087d4e74ef341aa236f94f..2cb04e533d2e5c7caf5d3be062b9c0a51635cb1c 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class KarriereVideosIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin',
         'info_dict': {
index a59c529f4c90d8f3211a783ec5e4e2d3f7be9d84..704bd7b34554af60dfec9b811251f5270cbd1f55 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 class KontrTubeIE(InfoExtractor):
     IE_NAME = 'kontrtube'
     IE_DESC = 'KontrTube.ru - Труба зовёт'
-    _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
+    _VALID_URL = r'https?://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
 
     _TEST = {
         'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
index a602980a141f3f8ccce026eaddc8b383e7894352..a574408e55b6a5ee251d94ca1d0346b9e34ac0b8 100644 (file)
@@ -4,7 +4,7 @@ from .common import InfoExtractor
 
 
 class Ku6IE(InfoExtractor):
-    _VALID_URL = r'http://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html'
+    _VALID_URL = r'https?://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html'
     _TEST = {
         'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html',
         'md5': '01203549b9efbb45f4b87d55bdea1ed1',
index 931f34c9b5694e3b9ba27859b0be79bbcecf1274..12cc56e444aaa63839664c8e70f82154045041c7 100644 (file)
@@ -16,7 +16,7 @@ from ..utils import (
 
 
 class KUSIIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
+    _VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
     _TESTS = [{
         'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold',
         'md5': 'f926e7684294cf8cb7bdf8858e1b3988',
index f94804d06865c9e5b72c936818dbce5f4bf09229..a586308b2d31e8bbac83b8446c5e00dd9b2bdce9 100644 (file)
@@ -2,13 +2,13 @@
 from __future__ import unicode_literals
 
 import re
-import itertools
 
 from .common import InfoExtractor
 from ..utils import (
     get_element_by_id,
     clean_html,
     ExtractorError,
+    InAdvancePagedList,
     remove_start,
 )
 
@@ -55,7 +55,7 @@ class KuwoBaseIE(InfoExtractor):
 class KuwoIE(KuwoBaseIE):
     IE_NAME = 'kuwo:song'
     IE_DESC = '酷我音乐'
-    _VALID_URL = r'http://www\.kuwo\.cn/yinyue/(?P<id>\d+?)/'
+    _VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+?)'
     _TESTS = [{
         'url': 'http://www.kuwo.cn/yinyue/635632/',
         'info_dict': {
@@ -80,6 +80,9 @@ class KuwoIE(KuwoBaseIE):
         'params': {
             'format': 'mp3-320'
         },
+    }, {
+        'url': 'http://www.kuwo.cn/yinyue/3197154?catalog=yueku2016',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -131,7 +134,7 @@ class KuwoIE(KuwoBaseIE):
 class KuwoAlbumIE(InfoExtractor):
     IE_NAME = 'kuwo:album'
     IE_DESC = '酷我音乐 - 专辑'
-    _VALID_URL = r'http://www\.kuwo\.cn/album/(?P<id>\d+?)/'
+    _VALID_URL = r'https?://www\.kuwo\.cn/album/(?P<id>\d+?)/'
     _TEST = {
         'url': 'http://www.kuwo.cn/album/502294/',
         'info_dict': {
@@ -167,13 +170,11 @@ class KuwoAlbumIE(InfoExtractor):
 class KuwoChartIE(InfoExtractor):
     IE_NAME = 'kuwo:chart'
     IE_DESC = '酷我音乐 - 排行榜'
-    _VALID_URL = r'http://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
+    _VALID_URL = r'https?://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
     _TEST = {
         'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm',
         'info_dict': {
             'id': '香港中文龙虎榜',
-            'title': '香港中文龙虎榜',
-            'description': 're:\d{4}第\d{2}期',
         },
         'playlist_mincount': 10,
     }
@@ -184,30 +185,24 @@ class KuwoChartIE(InfoExtractor):
             url, chart_id, note='Download chart info',
             errnote='Unable to get chart info')
 
-        chart_name = self._html_search_regex(
-            r'<h1[^>]+class="unDis">([^<]+)</h1>', webpage, 'chart name')
-
-        chart_desc = self._html_search_regex(
-            r'<p[^>]+class="tabDef">(\d{4}第\d{2}期)</p>', webpage, 'chart desc')
-
         entries = [
             self.url_result(song_url, 'Kuwo') for song_url in re.findall(
-                r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/"', webpage)
+                r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)', webpage)
         ]
-        return self.playlist_result(entries, chart_id, chart_name, chart_desc)
+        return self.playlist_result(entries, chart_id)
 
 
 class KuwoSingerIE(InfoExtractor):
     IE_NAME = 'kuwo:singer'
     IE_DESC = '酷我音乐 - 歌手'
-    _VALID_URL = r'http://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://www.kuwo.cn/mingxing/bruno+mars/',
         'info_dict': {
             'id': 'bruno+mars',
             'title': 'Bruno Mars',
         },
-        'playlist_count': 10,
+        'playlist_mincount': 329,
     }, {
         'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm',
         'info_dict': {
@@ -218,6 +213,8 @@ class KuwoSingerIE(InfoExtractor):
         'skip': 'Regularly stalls travis build',  # See https://travis-ci.org/rg3/youtube-dl/jobs/78878540
     }]
 
+    PAGE_SIZE = 15
+
     def _real_extract(self, url):
         singer_id = self._match_id(url)
         webpage = self._download_webpage(
@@ -225,25 +222,28 @@ class KuwoSingerIE(InfoExtractor):
             errnote='Unable to get singer info')
 
         singer_name = self._html_search_regex(
-            r'<div class="title clearfix">\s*<h1>([^<]+)<span', webpage, 'singer name'
-        )
+            r'<h1>([^<]+)</h1>', webpage, 'singer name')
+
+        artist_id = self._html_search_regex(
+            r'data-artistid="(\d+)"', webpage, 'artist id')
+
+        page_count = int(self._html_search_regex(
+            r'data-page="(\d+)"', webpage, 'page count'))
 
-        entries = []
-        first_page_only = False if re.search(r'/music(?:_\d+)?\.htm', url) else True
-        for page_num in itertools.count(1):
+        def page_func(page_num):
             webpage = self._download_webpage(
-                'http://www.kuwo.cn/mingxing/%s/music_%d.htm' % (singer_id, page_num),
-                singer_id, note='Download song list page #%d' % page_num,
-                errnote='Unable to get song list page #%d' % page_num)
+                'http://www.kuwo.cn/artist/contentMusicsAjax',
+                singer_id, note='Download song list page #%d' % (page_num + 1),
+                errnote='Unable to get song list page #%d' % (page_num + 1),
+                query={'artistId': artist_id, 'pn': page_num, 'rn': self.PAGE_SIZE})
 
-            entries.extend([
+            return [
                 self.url_result(song_url, 'Kuwo') for song_url in re.findall(
-                    r'<p[^>]+class="m_name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/',
+                    r'<div[^>]+class="name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)',
                     webpage)
-            ][:10 if first_page_only else None])
+            ]
 
-            if first_page_only or not re.search(r'<a[^>]+href="[^"]+">下一页</a>', webpage):
-                break
+        entries = InAdvancePagedList(page_func, page_count, self.PAGE_SIZE)
 
         return self.playlist_result(entries, singer_id, singer_name)
 
@@ -251,7 +251,7 @@ class KuwoSingerIE(InfoExtractor):
 class KuwoCategoryIE(InfoExtractor):
     IE_NAME = 'kuwo:category'
     IE_DESC = '酷我音乐 - 分类'
-    _VALID_URL = r'http://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
+    _VALID_URL = r'https?://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
     _TEST = {
         'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm',
         'info_dict': {
@@ -288,7 +288,7 @@ class KuwoCategoryIE(InfoExtractor):
 class KuwoMvIE(KuwoBaseIE):
     IE_NAME = 'kuwo:mv'
     IE_DESC = '酷我音乐 - MV'
-    _VALID_URL = r'http://www\.kuwo\.cn/mv/(?P<id>\d+?)/'
+    _VALID_URL = r'https?://www\.kuwo\.cn/mv/(?P<id>\d+?)/'
     _TEST = {
         'url': 'http://www.kuwo.cn/mv/6480076/',
         'info_dict': {
index 5d8ebbeb3d6c01fc5493751b7a89623877bba7c3..41d80bc12e69aa8ef4d54ff71ca667754d6b2409 100644 (file)
@@ -19,7 +19,7 @@ from ..utils import (
 
 
 class Laola1TvIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/[^/]+/(?P<slug>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/(?P<kind>[^/]+)/(?P<slug>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
         'info_dict': {
@@ -33,7 +33,7 @@ class Laola1TvIE(InfoExtractor):
         },
         'params': {
             'skip_download': True,
-        }
+        },
     }, {
         'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie',
         'info_dict': {
@@ -47,12 +47,28 @@ class Laola1TvIE(InfoExtractor):
         },
         'params': {
             'skip_download': True,
-        }
+        },
+    }, {
+        'url': 'http://www.laola1.tv/de-de/livestream/2016-03-22-belogorie-belgorod-trentino-diatec-lde',
+        'info_dict': {
+            'id': '487850',
+            'display_id': '2016-03-22-belogorie-belgorod-trentino-diatec-lde',
+            'ext': 'flv',
+            'title': 'Belogorie BELGOROD - TRENTINO Diatec',
+            'upload_date': '20160322',
+            'uploader': 'CEV - Europäischer Volleyball Verband',
+            'is_live': True,
+            'categories': ['Volleyball'],
+        },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         display_id = mobj.group('slug')
+        kind = mobj.group('kind')
         lang = mobj.group('lang')
         portal = mobj.group('portal')
 
@@ -85,12 +101,17 @@ class Laola1TvIE(InfoExtractor):
         _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
         title = _v('title', fatal=True)
 
+        VS_TARGETS = {
+            'video': '2',
+            'livestream': '17',
+        }
+
         req = sanitized_Request(
             'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' %
             compat_urllib_parse.urlencode({
                 'videoId': video_id,
-                'target': '2',
-                'label': 'laola1tv',
+                'target': VS_TARGETS.get(kind, '2'),
+                'label': _v('label'),
                 'area': _v('area'),
             }),
             urlencode_postdata(
index df47e88ba98a8d69d2dca43cf20ccb7366232f57..462b752dd9b85af0f845f5677bc3128eb6780590 100644 (file)
@@ -28,7 +28,7 @@ from ..utils import (
 
 class LeIE(InfoExtractor):
     IE_DESC = '乐视网'
-    _VALID_URL = r'http://www\.le\.com/ptv/vplay/(?P<id>\d+)\.html'
+    _VALID_URL = r'https?://www\.le\.com/ptv/vplay/(?P<id>\d+)\.html'
 
     _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
 
@@ -196,7 +196,7 @@ class LeIE(InfoExtractor):
 
 
 class LePlaylistIE(InfoExtractor):
-    _VALID_URL = r'http://[a-z]+\.le\.com/[a-z]+/(?P<id>[a-z0-9_]+)'
+    _VALID_URL = r'https?://[a-z]+\.le\.com/[a-z]+/(?P<id>[a-z0-9_]+)'
 
     _TESTS = [{
         'url': 'http://www.le.com/tv/46177.html',
index a8fd639cc9276c2290f9c89dbded1bb9a1cbe7dd..ba2f80a757d071042b8d574721bde37a1b7006ba 100644 (file)
@@ -17,7 +17,7 @@ from ..utils import (
 class LifeNewsIE(InfoExtractor):
     IE_NAME = 'lifenews'
     IE_DESC = 'LIFE | NEWS'
-    _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
+    _VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
 
     _TESTS = [{
         # single video embedded via video/source
@@ -159,7 +159,7 @@ class LifeNewsIE(InfoExtractor):
 
 class LifeEmbedIE(InfoExtractor):
     IE_NAME = 'life:embed'
-    _VALID_URL = r'http://embed\.life\.ru/embed/(?P<id>[\da-f]{32})'
+    _VALID_URL = r'https?://embed\.life\.ru/embed/(?P<id>[\da-f]{32})'
 
     _TEST = {
         'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291',
index 1a0625ac3e0eeefa5ca18e968ae69e210a960662..2599d45c37e3c7874e12227677962fae3a2fbf84 100644 (file)
@@ -123,7 +123,7 @@ class LimelightBaseIE(InfoExtractor):
 
 class LimelightMediaIE(LimelightBaseIE):
     IE_NAME = 'limelight'
-    _VALID_URL = r'(?:limelight:media:|http://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})'
+    _VALID_URL = r'(?:limelight:media:|https?://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})'
     _TESTS = [{
         'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
         'info_dict': {
@@ -176,7 +176,7 @@ class LimelightMediaIE(LimelightBaseIE):
 
 class LimelightChannelIE(LimelightBaseIE):
     IE_NAME = 'limelight:channel'
-    _VALID_URL = r'(?:limelight:channel:|http://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})'
+    _VALID_URL = r'(?:limelight:channel:|https?://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})'
     _TEST = {
         'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
         'info_dict': {
@@ -207,7 +207,7 @@ class LimelightChannelIE(LimelightBaseIE):
 
 class LimelightChannelListIE(LimelightBaseIE):
     IE_NAME = 'limelight:channel_list'
-    _VALID_URL = r'(?:limelight:channel_list:|http://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})'
+    _VALID_URL = r'(?:limelight:channel_list:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})'
     _TEST = {
         'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
         'info_dict': {
index 7e025831b51d611f00e248bda637b4ae8f35efb6..d5945ad66b3a784263fb1c5106534081b1f04913 100644 (file)
@@ -8,7 +8,7 @@ from .common import InfoExtractor
 
 class M6IE(InfoExtractor):
     IE_NAME = 'm6'
-    _VALID_URL = r'http://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'
+    _VALID_URL = r'https?://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'
 
     _TEST = {
         'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html',
index 71085f279fb4b9d2a40d866785e75d6d6c419674..46eb004925b88459b2c4859790aa06059a3b61a6 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 class MailRuIE(InfoExtractor):
     IE_NAME = 'mailru'
     IE_DESC = 'Видео@Mail.Ru'
-    _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
+    _VALID_URL = r'https?://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
 
     _TESTS = [
         {
index 67d6271e1ad107aceddaa2c8b4bd96558426bfce..c31e8798ae7ccdbaa8ef9baa481336ef8a5e8dd9 100644 (file)
@@ -17,7 +17,7 @@ from ..utils import (
 
 
 class MetacafeIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
+    _VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
     _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
     _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
     IE_NAME = 'metacafe'
index 819c1b90bb755c873b3f7f1b64e07dc97126a9b9..1aea78d118a84a135494214da54c3c2c21465bc9 100644 (file)
@@ -91,7 +91,7 @@ class MITIE(TechTVMITIE):
 
 class OCWMITIE(InfoExtractor):
     IE_NAME = 'ocw.mit.edu'
-    _VALID_URL = r'^http://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
+    _VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
     _BASE_URL = 'http://ocw.mit.edu/'
 
     _TESTS = [
index c595f20775efd8e4aed348e4886ac55209e2c6e2..9e584860a2e4ac98596093b701910c0060d978b9 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 class MiTeleIE(InfoExtractor):
     IE_DESC = 'mitele.es'
-    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
+    _VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
 
     _TESTS = [{
         'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
index 7cc7f054f6bba16b0ea44554de4a515bf7020342..f010f52d50b40685000c8eab0e20b1373badcc9d 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 class MooshareIE(InfoExtractor):
     IE_NAME = 'mooshare'
     IE_DESC = 'Mooshare.biz'
-    _VALID_URL = r'http://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'
+    _VALID_URL = r'https?://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'
 
     _TESTS = [
         {
index 0b4787c1d23daa2ed8ed4065df9baa9ab706521b..5e1a8a71a93aa28962d7f260af966d10cf8e9f7a 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class MotherlessIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
     _TESTS = [{
         'url': 'http://motherless.com/AC3FFE1',
         'md5': '310f62e325a9fafe64f68c0bccb6e75f',
@@ -69,6 +69,9 @@ class MotherlessIE(InfoExtractor):
                 ">The page you're looking for cannot be found.<")):
             raise ExtractorError('Video %s does not exist' % video_id, expected=True)
 
+        if '>The content you are trying to view is for friends only.' in webpage:
+            raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
+
         title = self._html_search_regex(
             r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
         video_url = self._html_search_regex(
index c1a482dba39fb98efdb28e85b681565eb58e3f9e..370328b362c2a0661925d054be121a7216dc94c7 100644 (file)
@@ -9,7 +9,7 @@ from ..compat import (
 
 class MotorsportIE(InfoExtractor):
     IE_DESC = 'motorsport.com'
-    _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
+    _VALID_URL = r'https?://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
     _TEST = {
         'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
         'info_dict': {
index f936b92bbdbb031485fd66fb8dc802b3ba9ff9b3..1ca7b1a9e958c221f44c48bced04c314c0957f8c 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class MySpassIE(InfoExtractor):
-    _VALID_URL = r'http://www\.myspass\.de/.*'
+    _VALID_URL = r'https?://www\.myspass\.de/.*'
     _TEST = {
         'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
         'md5': '0b49f4844a068f8b33f4b7c88405862b',
index 1e21cf98a9a415a33a4bb563dd296928c27426b6..c83a1eab5ba6b90229422bce078c9ae170fa5069 100644 (file)
@@ -20,7 +20,7 @@ from ..utils import (
 
 class MyVideoIE(InfoExtractor):
     _WORKING = False
-    _VALID_URL = r'http://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
+    _VALID_URL = r'https?://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
     IE_NAME = 'myvideo'
     _TEST = {
         'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
index a94ab8358cacc51094ab791ace648ec062eb5f94..731c245428103b3ea96f5c396b063afadac82702 100644 (file)
@@ -4,7 +4,7 @@ from .common import InfoExtractor
 
 
 class MyVidsterIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/'
+    _VALID_URL = r'https?://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/'
 
     _TEST = {
         'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making',
index 7ce8d9b182ca78401e772ebef5311a37c7819ee0..d5e53365cc52d93da99953a774871e862ca3cc2a 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import (
 
 
 class NationalGeographicIE(InfoExtractor):
-    _VALID_URL = r'http://video\.nationalgeographic\.com/.*?'
+    _VALID_URL = r'https?://video\.nationalgeographic\.com/.*?'
 
     _TESTS = [
         {
index bb0817e34ea98edb641580d356b901fee2200c7b..a622f2212d8af38519b2906f6b27d5c0ad0dac57 100644 (file)
@@ -115,7 +115,7 @@ class NBCSportsVPlayerIE(InfoExtractor):
 
 class NBCSportsIE(InfoExtractor):
     # Does not include https because its certificate is invalid
-    _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
+    _VALID_URL = r'https?://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
 
     _TEST = {
         'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
@@ -295,7 +295,7 @@ class NBCNewsIE(ThePlatformIE):
 
 class MSNBCIE(InfoExtractor):
     # https URLs redirect to corresponding http ones
-    _VALID_URL = r'http://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)'
     _TEST = {
         'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
         'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
index d1688457f28d298e7de2921dcf00e3fba6c087a2..aae7aeeebb8e2adebd2669bcd899caec3432275d 100644 (file)
@@ -7,7 +7,7 @@ from ..utils import parse_iso8601
 
 class NextMediaIE(InfoExtractor):
     IE_DESC = '蘋果日報'
-    _VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
+    _VALID_URL = r'https?://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
         'md5': 'dff9fad7009311c421176d1ac90bfe4f',
@@ -68,7 +68,7 @@ class NextMediaIE(InfoExtractor):
 
 class NextMediaActionNewsIE(NextMediaIE):
     IE_DESC = '蘋果日報 - 動新聞'
-    _VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
+    _VALID_URL = r'https?://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
     _TESTS = [{
         'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
         'md5': '05fce8ffeed7a5e00665d4b7cf0f9201',
@@ -93,7 +93,7 @@ class NextMediaActionNewsIE(NextMediaIE):
 
 class AppleDailyIE(NextMediaIE):
     IE_DESC = '臺灣蘋果日報'
-    _VALID_URL = r'http://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
+    _VALID_URL = r'https?://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
     _TESTS = [{
         'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
         'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
index d440313d545b18723614d8eb9b8dc738a4cf476c..ec7317a2f6d16911562285871167786e31902c62 100644 (file)
@@ -22,7 +22,7 @@ from ..utils import (
 
 
 class NocoIE(InfoExtractor):
-    _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
     _LOGIN_URL = 'http://noco.tv/do.php'
     _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
     _SUB_LANG_TEMPLATE = '&sub_lang=%s'
index 5952d136f7b3efd3e9f91843ba12de6a13d989ba..77e09107299824f5ae4063817d73e505e893c2af 100644 (file)
@@ -9,7 +9,7 @@ from ..utils import (
 
 
 class NormalbootsIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$'
+    _VALID_URL = r'https?://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$'
     _TEST = {
         'url': 'http://normalboots.com/video/home-alone-games-jontron/',
         'md5': '8bf6de238915dd501105b44ef5f1e0f6',
index 3f9c776ef665ab47624eeab7ba60f5754dbf213e..17671ad398b9e9a8148bceff74db678969d26d3f 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 class NovaIE(InfoExtractor):
     IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
-    _VALID_URL = 'http://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
+    _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
     _TESTS = [{
         'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html?utm_source=tvnoviny&utm_medium=cpfooter&utm_campaign=novaplus',
         'info_dict': {
index 125c7010b9206bce25688c5c3cbad576753f70f0..a3f0abb4eda4afdfb7afb5e6dec168874431e716 100644 (file)
@@ -9,7 +9,7 @@ from ..utils import (
 
 
 class NprIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?npr\.org/player/v2/mediaPlayer\.html\?.*\bid=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?npr\.org/player/v2/mediaPlayer\.html\?.*\bid=(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.npr.org/player/v2/mediaPlayer.html?id=449974205',
         'info_dict': {
index 2cd924d059dafd9aa3734697c9c4a396b2bb01f6..0895d7ea4cb88f805605a55cb0c1fe56ff1d475d 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 class NTVRuIE(InfoExtractor):
     IE_NAME = 'ntv.ru'
-    _VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
+    _VALID_URL = r'https?://(?:www\.)?ntv\.ru/(?P<id>.+)'
 
     _TESTS = [
         {
index 080045d4c1446875cb40fdf85c516e1e4cc419a3..5db949b176141863a290adfcd1fe3bbda11076b7 100644 (file)
@@ -20,6 +20,10 @@ class OnceIE(InfoExtractor):
             media_item_id, 'mp4', m3u8_id='hls', fatal=False)
         progressive_formats = []
         for adaptive_format in formats:
+            # Prevent advertisement from embedding into m3u8 playlist (see
+            # https://github.com/rg3/youtube-dl/issues/8893#issuecomment-199912684)
+            adaptive_format['url'] = re.sub(
+                r'\badsegmentlength=\d+', r'adsegmentlength=0', adaptive_format['url'])
             rendition_id = self._search_regex(
                 r'/now/media/playlist/[^/]+/[^/]+/([^/]+)',
                 adaptive_format['url'], 'redition id', default=None)
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py
new file mode 100644 (file)
index 0000000..4468f31
--- /dev/null
@@ -0,0 +1,107 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_chr
+from ..utils import (
+    encode_base_n,
+    ExtractorError,
+)
+
+
+class OpenloadIE(InfoExtractor):
+    _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-]+)'
+
+    _TESTS = [{
+        'url': 'https://openload.co/f/kUEfGclsU9o',
+        'md5': 'bf1c059b004ebc7a256f89408e65c36e',
+        'info_dict': {
+            'id': 'kUEfGclsU9o',
+            'ext': 'mp4',
+            'title': 'skyrim_no-audio_1080.mp4',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }, {
+        'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
+        'only_matching': True,
+    }, {
+        'url': 'https://openload.io/f/ZAn6oz-VZGE/',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def openload_level2_debase(m):
+        radix, num = int(m.group(1)) + 27, int(m.group(2))
+        return '"' + encode_base_n(num, radix) + '"'
+
+    @classmethod
+    def openload_level2(cls, txt):
+        # The function name is ǃ \u01c3
+        # Using escaped unicode literals does not work in Python 3.2
+        return re.sub(r'ǃ\((\d+),(\d+)\)', cls.openload_level2_debase, txt, re.UNICODE).replace('"+"', '')
+
+    # Openload uses a variant of aadecode
+    # openload_decode and related functions are originally written by
+    # vitas@matfyz.cz and released with public domain
+    # See https://github.com/rg3/youtube-dl/issues/8489
+    @classmethod
+    def openload_decode(cls, txt):
+        symbol_table = [
+            ('_', '(゚Д゚) [゚Θ゚]'),
+            ('a', '(゚Д゚) [゚ω゚ノ]'),
+            ('b', '(゚Д゚) [゚Θ゚ノ]'),
+            ('c', '(゚Д゚) [\'c\']'),
+            ('d', '(゚Д゚) [゚ー゚ノ]'),
+            ('e', '(゚Д゚) [゚Д゚ノ]'),
+            ('f', '(゚Д゚) [1]'),
+
+            ('o', '(゚Д゚) [\'o\']'),
+            ('u', '(o゚ー゚o)'),
+            ('c', '(゚Д゚) [\'c\']'),
+
+            ('7', '((゚ー゚) + (o^_^o))'),
+            ('6', '((o^_^o) +(o^_^o) +(c^_^o))'),
+            ('5', '((゚ー゚) + (゚Θ゚))'),
+            ('4', '(-~3)'),
+            ('3', '(-~-~1)'),
+            ('2', '(-~1)'),
+            ('1', '(-~0)'),
+            ('0', '((c^_^o)-(c^_^o))'),
+        ]
+        delim = '(゚Д゚)[゚ε゚]+'
+        ret = ''
+        for aachar in txt.split(delim):
+            for val, pat in symbol_table:
+                aachar = aachar.replace(pat, val)
+            aachar = aachar.replace('+ ', '')
+            m = re.match(r'^\d+', aachar)
+            if m:
+                ret += compat_chr(int(m.group(0), 8))
+            else:
+                m = re.match(r'^u([\da-f]+)', aachar)
+                if m:
+                    ret += compat_chr(int(m.group(1), 16))
+        return cls.openload_level2(ret)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        if 'File not found' in webpage:
+            raise ExtractorError('File not found', expected=True)
+
+        code = self._search_regex(
+            r'<video[^>]+>\s*<script[^>]+>([^<]+)</script>',
+            webpage, 'JS code')
+
+        video_url = self._search_regex(
+            r'return\s+"(https?://[^"]+)"', self.openload_decode(code), 'video URL')
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'url': video_url,
+        }
index 958eb398b992e4d3b4278ebe362f71074e87c19b..66c75f8b3559752127c091d437e4764b7c722e9d 100644 (file)
@@ -137,7 +137,7 @@ class ORFTVthekIE(InfoExtractor):
 class ORFOE1IE(InfoExtractor):
     IE_NAME = 'orf:oe1'
     IE_DESC = 'Radio Österreich 1'
-    _VALID_URL = r'http://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)'
 
     # Audios on ORF radio are only available for 7 days, so we can't add tests.
     _TEST = {
@@ -171,7 +171,7 @@ class ORFOE1IE(InfoExtractor):
 class ORFFM4IE(InfoExtractor):
     IE_NAME = 'orf:fm4'
     IE_DESC = 'radio FM4'
-    _VALID_URL = r'http://fm4\.orf\.at/(?:7tage/?#|player/)(?P<date>[0-9]+)/(?P<show>\w+)'
+    _VALID_URL = r'https?://fm4\.orf\.at/(?:7tage/?#|player/)(?P<date>[0-9]+)/(?P<show>\w+)'
 
     _TEST = {
         'url': 'http://fm4.orf.at/player/20160110/IS/',
@@ -222,7 +222,7 @@ class ORFFM4IE(InfoExtractor):
 class ORFIPTVIE(InfoExtractor):
     IE_NAME = 'orf:iptv'
     IE_DESC = 'iptv.ORF.at'
-    _VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
+    _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
 
     _TEST = {
         'url': 'http://iptv.orf.at/stories/2275236/',
index 6e60e5fe98920c64d310c3610194d98b01790ceb..f1008ae514f78f6c843e399031135afb00f5f23f 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 class PhilharmonieDeParisIE(InfoExtractor):
     IE_DESC = 'Philharmonie de Paris'
-    _VALID_URL = r'http://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)'
+    _VALID_URL = r'https?://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',
         'info_dict': {
index 788411ccc18082f59588d40704900c26dba1fe21..6c8bbe1d95c3c4972baa4c956ad1a62ef6518e2d 100644 (file)
@@ -8,7 +8,7 @@ from ..compat import compat_urllib_parse_unquote
 
 
 class PhotobucketIE(InfoExtractor):
-    _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
+    _VALID_URL = r'https?://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
     _TEST = {
         'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
         'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
index 3e15533e9d445e96884974af398e0461438bcc8b..63ce87ee358004cbfae87b0744bdb02329f31372 100644 (file)
@@ -1,7 +1,10 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-from ..compat import compat_urllib_parse
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_parse_unquote,
+)
 from .common import InfoExtractor
 from ..utils import (
     parse_duration,
@@ -28,9 +31,10 @@ class Porn91IE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id
         self._set_cookie('91porn.com', 'language', 'cn_CN')
-        webpage = self._download_webpage(url, video_id, 'get HTML content')
+
+        webpage = self._download_webpage(
+            'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id)
 
         if '作为游客,你每天只可观看10个视频' in webpage:
             raise ExtractorError('91 Porn says: Daily limit 10 videos exceeded', expected=True)
@@ -54,8 +58,9 @@ class Porn91IE(InfoExtractor):
         })
         info_cn = self._download_webpage(
             'http://91porn.com/getfile.php?' + url_params, video_id,
-            'get real video url')
-        video_url = self._search_regex(r'file=([^&]+)&', info_cn, 'url')
+            'Downloading real video url')
+        video_url = compat_urllib_parse_unquote(self._search_regex(
+            r'file=([^&]+)&', info_cn, 'url'))
 
         duration = parse_duration(self._search_regex(
             r'时长:\s*</span>\s*(\d+:\d+)', webpage, 'duration', fatal=False))
index 57c78ba52a994a9c2aff224470b86b913702241f..39b53ecf68c77786f18956040bf7ccac4fd6dbc5 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class PornHdIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
+    _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
     _TEST = {
         'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
         'md5': '956b8ca569f7f4d8ec563e2c41598441',
index 1a53fd71c068626e3d91e737c9b295f4e0a1b0a9..6b51e5c5400ee59859eb0d29cb740a31f34f3a96 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class PornoVoisinesIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
 
     _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \
         '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4'
index 30a5f2de4475a934cfa467764d0ce559d3e68a74..cc0416cb81eb23ed87d1dae0cdf2573a6df8936a 100644 (file)
@@ -7,7 +7,7 @@ from .common import InfoExtractor
 
 
 class PyvideoIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
+    _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
 
     _TESTS = [
         {
index 45a3c41c59438b8784434fb2c5dbfb9cef3c1674..ff0af9543c2b5e5527f406958e9ae5ae4d1adbda 100644 (file)
@@ -18,7 +18,7 @@ from ..utils import (
 class QQMusicIE(InfoExtractor):
     IE_NAME = 'qqmusic'
     IE_DESC = 'QQ音乐'
-    _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
+    _VALID_URL = r'https?://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
     _TESTS = [{
         'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
         'md5': '9ce1c1c8445f561506d2e3cfb0255705',
@@ -172,7 +172,7 @@ class QQPlaylistBaseIE(InfoExtractor):
 class QQMusicSingerIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:singer'
     IE_DESC = 'QQ音乐 - 歌手'
-    _VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
+    _VALID_URL = r'https?://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
     _TEST = {
         'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
         'info_dict': {
@@ -217,7 +217,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE):
 class QQMusicAlbumIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:album'
     IE_DESC = 'QQ音乐 - 专辑'
-    _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
+    _VALID_URL = r'https?://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
 
     _TESTS = [{
         'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1',
@@ -260,7 +260,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
 class QQMusicToplistIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:toplist'
     IE_DESC = 'QQ音乐 - 排行榜'
-    _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
+    _VALID_URL = r'https?://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
 
     _TESTS = [{
         'url': 'http://y.qq.com/#type=toplist&p=global_123',
@@ -314,7 +314,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
 class QQMusicPlaylistIE(QQPlaylistBaseIE):
     IE_NAME = 'qqmusic:playlist'
     IE_DESC = 'QQ音乐 - 歌单'
-    _VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)'
 
     _TESTS = [{
         'url': 'http://y.qq.com/#type=taoge&id=3462654915',
index a4dc5c335e152ce47424a73263dc208d942d31ab..e36ce1aa1940deafd5a633bec814e7462008c3b1 100644 (file)
@@ -18,7 +18,7 @@ from ..utils import (
 
 
 class RaiTVIE(InfoExtractor):
-    _VALID_URL = r'http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+media/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
+    _VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+media/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
     _TESTS = [
         {
             'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
@@ -175,7 +175,7 @@ class RaiTVIE(InfoExtractor):
 
 
 class RaiIE(InfoExtractor):
-    _VALID_URL = r'http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
+    _VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
     _TESTS = [
         {
             'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
index d6054d7175fd49a22117dd357bea7905f6e739be..7ba41ba593295cdc7d2e28e6b64702321ed1ef08 100644 (file)
@@ -5,7 +5,7 @@ from ..utils import ExtractorError
 
 
 class RedTubeIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://www.redtube.com/66418',
         'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
index 50875807577d076cd81c2021aa7695334ef272da..2c2c707bd36ad3f737072bf1f9011027e0514bd9 100644 (file)
@@ -6,7 +6,7 @@ from .common import InfoExtractor
 
 
 class RingTVIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
     _TEST = {
         'url': 'http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30',
         'md5': 'd25945f5df41cdca2d2587165ac28720',
index 8a8c5d2a0e2b27bd6e476ef7d848753fbe648dd5..08cd1ae6cd3a138d6a12ca147cdca51613922f6e 100644 (file)
@@ -62,7 +62,7 @@ def _decrypt_url(png):
 class RTVEALaCartaIE(InfoExtractor):
     IE_NAME = 'rtve.es:alacarta'
     IE_DESC = 'RTVE a la carta'
-    _VALID_URL = r'http://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
+    _VALID_URL = r'https?://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
@@ -179,7 +179,7 @@ class RTVEInfantilIE(InfoExtractor):
 class RTVELiveIE(InfoExtractor):
     IE_NAME = 'rtve.es:live'
     IE_DESC = 'RTVE.es live streams'
-    _VALID_URL = r'http://www\.rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
+    _VALID_URL = r'https?://www\.rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
 
     _TESTS = [{
         'url': 'http://www.rtve.es/directo/la-1/',
index 0e470e73f538fd60d7ed34cbe515042f6abc078b..1f7c262993c8ce7e0d602f612fc6316e80052f66 100644 (file)
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 
 
 class RUHDIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.ruhd.ru/play.php?vid=207',
         'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83',
index c5c47d01ecef917ce037fc0ede75dd2b5e770138..9ca4ae147cb1e3c430de3abd9fd0927aaee2ed5a 100644 (file)
@@ -122,7 +122,7 @@ class RutubeEmbedIE(InfoExtractor):
 class RutubeChannelIE(InfoExtractor):
     IE_NAME = 'rutube:channel'
     IE_DESC = 'Rutube channels'
-    _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)'
+    _VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://rutube.ru/tags/video/1800/',
         'info_dict': {
@@ -156,7 +156,7 @@ class RutubeChannelIE(InfoExtractor):
 class RutubeMovieIE(RutubeChannelIE):
     IE_NAME = 'rutube:movie'
     IE_DESC = 'Rutube movies'
-    _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)'
+    _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
     _TESTS = []
 
     _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
@@ -174,7 +174,7 @@ class RutubeMovieIE(RutubeChannelIE):
 class RutubePersonIE(RutubeChannelIE):
     IE_NAME = 'rutube:person'
     IE_DESC = 'Rutube person videos'
-    _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)'
+    _VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://rutube.ru/video/person/313878/',
         'info_dict': {
index f7fe1feceeccbb3c7dc6860f0ee5d6a9c10a57df..a2379eb04c2e6744a49f315ebee2a0c9fb0170f6 100644 (file)
@@ -14,7 +14,7 @@ class RUTVIE(InfoExtractor):
     IE_DESC = 'RUTV.RU'
     _VALID_URL = r'''(?x)
         https?://player\.(?:rutv\.ru|vgtrk\.com)/
-            (?P<path>flash2v/container\.swf\?id=
+            (?P<path>flash\d+v/container\.swf\?id=
             |iframe/(?P<type>swf|video|live)/id/
             |index/iframe/cast_id/)
             (?P<id>\d+)'''
@@ -109,7 +109,7 @@ class RUTVIE(InfoExtractor):
             return mobj.group('url')
 
         mobj = re.search(
-            r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
+            r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
             webpage)
         if mobj:
             return mobj.group('url')
@@ -119,7 +119,7 @@ class RUTVIE(InfoExtractor):
         video_id = mobj.group('id')
         video_path = mobj.group('path')
 
-        if video_path.startswith('flash2v'):
+        if re.match(r'flash\d+v', video_path):
             video_type = 'video'
         elif video_path.startswith('iframe'):
             video_type = mobj.group('type')
@@ -168,7 +168,7 @@ class RUTVIE(InfoExtractor):
                         'play_path': mobj.group('playpath'),
                         'app': mobj.group('app'),
                         'page_url': 'http://player.rutv.ru',
-                        'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22',
+                        'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22',
                         'rtmp_live': True,
                         'ext': 'flv',
                         'vbr': int(quality),
index 256396bb8c21174fdaca4524c0b21d1ef7f802a9..6ba91f202baadbfd72160cc739efde868a60d421 100644 (file)
@@ -75,16 +75,7 @@ class SafariBaseIE(InfoExtractor):
 class SafariIE(SafariBaseIE):
     IE_NAME = 'safari'
     IE_DESC = 'safaribooksonline.com online video'
-    _VALID_URL = r'''(?x)https?://
-                            (?:www\.)?safaribooksonline\.com/
-                                (?:
-                                    library/view/[^/]+|
-                                    api/v1/book
-                                )/
-                                (?P<course_id>[^/]+)/
-                                    (?:chapter(?:-content)?/)?
-                                (?P<part>part\d+)\.html
-    '''
+    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>part\d+)\.html'
 
     _TESTS = [{
         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
@@ -97,9 +88,6 @@ class SafariIE(SafariBaseIE):
             'upload_date': '20150724',
             'uploader_id': 'stork',
         },
-    }, {
-        'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
-        'only_matching': True,
     }, {
         # non-digits in course id
         'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
@@ -108,13 +96,18 @@ class SafariIE(SafariBaseIE):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        course_id = mobj.group('course_id')
-        part = mobj.group('part')
-
-        webpage = self._download_webpage(url, '%s/%s' % (course_id, part))
-        reference_id = self._search_regex(r'data-reference-id="([^"]+)"', webpage, 'kaltura reference id')
-        partner_id = self._search_regex(r'data-partner-id="([^"]+)"', webpage, 'kaltura widget id')
-        ui_id = self._search_regex(r'data-ui-id="([^"]+)"', webpage, 'kaltura uiconf id')
+        video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part'))
+
+        webpage = self._download_webpage(url, video_id)
+        reference_id = self._search_regex(
+            r'data-reference-id=(["\'])(?P<id>.+?)\1',
+            webpage, 'kaltura reference id', group='id')
+        partner_id = self._search_regex(
+            r'data-partner-id=(["\'])(?P<id>.+?)\1',
+            webpage, 'kaltura widget id', group='id')
+        ui_id = self._search_regex(
+            r'data-ui-id=(["\'])(?P<id>.+?)\1',
+            webpage, 'kaltura uiconf id', group='id')
 
         query = {
             'wid': '_%s' % partner_id,
@@ -125,7 +118,7 @@ class SafariIE(SafariBaseIE):
         if self.LOGGED_IN:
             kaltura_session = self._download_json(
                 '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id),
-                course_id, 'Downloading kaltura session JSON',
+                video_id, 'Downloading kaltura session JSON',
                 'Unable to download kaltura session JSON', fatal=False)
             if kaltura_session:
                 session = kaltura_session.get('session')
@@ -137,6 +130,23 @@ class SafariIE(SafariBaseIE):
             'Kaltura')
 
 
+class SafariApiIE(SafariBaseIE):
+    IE_NAME = 'safari:api'
+    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>part\d+)\.html'
+
+    _TEST = {
+        'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
+        'only_matching': True,
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        part = self._download_json(
+            url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')),
+            'Downloading part JSON')
+        return self.url_result(part['web_url'], SafariIE.ie_key())
+
+
 class SafariCourseIE(SafariBaseIE):
     IE_NAME = 'safari:course'
     IE_DESC = 'safaribooksonline.com online courses'
@@ -168,7 +178,7 @@ class SafariCourseIE(SafariBaseIE):
                 'No chapters found for course %s' % course_id, expected=True)
 
         entries = [
-            self.url_result(chapter, 'Safari')
+            self.url_result(chapter, SafariApiIE.ie_key())
             for chapter in course_json['chapters']]
 
         course_title = course_json['title']
index f2af15f6b43ef5cd205db383cd01c283069dd05b..dd0a6ba19d4ef3b9397af6d977277256cbc0e1e9 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class ScreenJunkiesIE(InfoExtractor):
-    _VALID_URL = r'http://www.screenjunkies.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
+    _VALID_URL = r'https?://www.screenjunkies.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
     _TESTS = [{
         'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915',
         'md5': '5c2b686bec3d43de42bde9ec047536b0',
index 4d3b585228570769f1c4ece0955ff2e6c0b6b73c..c5f474dd1d8a5040a5368de7f2aa050658f7a984 100644 (file)
@@ -48,7 +48,7 @@ class SenateISVPIE(InfoExtractor):
         ['arch', '', 'http://ussenate-f.akamaihd.net/']
     ]
     _IE_NAME = 'senate.gov'
-    _VALID_URL = r'http://www\.senate\.gov/isvp/?\?(?P<qs>.+)'
+    _VALID_URL = r'https?://www\.senate\.gov/isvp/?\?(?P<qs>.+)'
     _TESTS = [{
         'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
         'info_dict': {
index 8eda3c8648a093213426437e151d217a1115da92..96fe0b90d91b080218ca387d8a4403130171358d 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 class SharedIE(InfoExtractor):
     IE_DESC = 'shared.sx and vivo.sx'
-    _VALID_URL = r'http://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})'
+    _VALID_URL = r'https?://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})'
 
     _TESTS = [{
         'url': 'http://shared.sx/0060718775',
index dfe50ed4585b0fe876b8a300edd00a453ae4b690..7e67833062d0a21d2c663b1b5d24246d653f0116 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import ExtractorError
 
 
 class Sport5IE(InfoExtractor):
-    _VALID_URL = r'http://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)'
     _TESTS = [
         {
             'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1',
index 13101c7146244181f62a634b773da446e2f5e79a..54d1843f2200d0cef7fa2e7b192f673d316c5f18 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import (
 
 
 class SSAIE(InfoExtractor):
-    _VALID_URL = r'http://ssa\.nls\.uk/film/(?P<id>\d+)'
+    _VALID_URL = r'https?://ssa\.nls\.uk/film/(?P<id>\d+)'
     _TEST = {
         'url': 'http://ssa.nls.uk/film/3561',
         'info_dict': {
index aa5964acb6b3f40b0d663bd2169ac6aec0c210ae..f562aa6d386ee891f4ab3a724bef53e20a6cec92 100644 (file)
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 
 
 class SztvHuIE(InfoExtractor):
-    _VALID_URL = r'http://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
         'md5': 'a6df607b11fb07d0e9f2ad94613375cb',
index d1b7264b4ca4a0cb72e491da26d7f5bbc1cc66b7..b49ab5f5b98c2d6219d1d17a1c0aea02eb534f61 100644 (file)
@@ -16,7 +16,7 @@ from ..compat import compat_ord
 
 
 class TeamcocoIE(InfoExtractor):
-    _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
+    _VALID_URL = r'https?://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
     _TESTS = [
         {
             'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
index 4e860db0a906f7892a2f155483bd66f7e34cecc2..a29a64b6d5d2fbcec5667902eba259cefb679125 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class Tele13IE(InfoExtractor):
-    _VALID_URL = r'^http://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
+    _VALID_URL = r'^https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
     _TESTS = [
         {
             'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
index 9ee84468488c61979f35b527dad0b617133f04b2..3f54b2744cb16cd6385e5cb06919cbaf9628167a 100644 (file)
@@ -6,7 +6,7 @@ from .common import InfoExtractor
 
 class TF1IE(InfoExtractor):
     """TF1 uses the wat.tv player."""
-    _VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html'
+    _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html'
     _TESTS = [{
         'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
         'info_dict': {
diff --git a/youtube_dl/extractor/thescene.py b/youtube_dl/extractor/thescene.py
new file mode 100644 (file)
index 0000000..08d666e
--- /dev/null
@@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..compat import compat_urllib_parse
+from ..utils import qualities
+
+
+class TheSceneIE(InfoExtractor):
+    _VALID_URL = r'https://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)'
+
+    _TEST = {
+        'url': 'https://thescene.com/watch/vogue/narciso-rodriguez-spring-2013-ready-to-wear',
+        'info_dict': {
+            'id': '520e8faac2b4c00e3c6e5f43',
+            'ext': 'mp4',
+            'title': 'Narciso Rodriguez: Spring 2013 Ready-to-Wear',
+            'display_id': 'narciso-rodriguez-spring-2013-ready-to-wear',
+        },
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        player_url = compat_urllib_parse.urljoin(
+            url,
+            self._html_search_regex(
+                r'id=\'js-player-script\'[^>]+src=\'(.+?)\'', webpage, 'player url'))
+
+        self.to_screen(player_url)
+        player = self._download_webpage(player_url, player_url)
+        info = self._parse_json(self._search_regex(r'(?m)var\s+video\s+=\s+({.+?});$', player, 'info json'), display_id)
+
+        qualities_order = qualities(['low', 'high'])
+        formats = [{
+            'format_id': '{0}-{1}'.format(f['type'].split('/')[0], f['quality']),
+            'url': f['src'],
+            'quality': qualities_order(f['quality']),
+        } for f in info['sources'][0]]
+        self._sort_formats(formats)
+
+        return {
+            'id': info['id'],
+            'title': info['title'],
+            'formats': formats,
+            'thumbnail': info.get('poster_frame'),
+            'display_id': display_id,
+        }
index 496f15d80b478f94bc2aac86c3d20417e2b09925..406f4a826623c0335e973a0f6dd79744fb32e982 100644 (file)
@@ -10,7 +10,7 @@ from ..utils import (
 
 
 class THVideoIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://thvideo.tv/v/th1987/',
         'md5': 'fa107b1f73817e325e9433505a70db50',
index e036b8cdf1e6ca6ad4277a4c3d22e79361322703..c43cace24d5bfd107328944d0bd290594ec06b3f 100644 (file)
@@ -9,7 +9,7 @@ from ..utils import ExtractorError
 class TinyPicIE(InfoExtractor):
     IE_NAME = 'tinypic'
     IE_DESC = 'tinypic.com videos'
-    _VALID_URL = r'http://(?:.+?\.)?tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
+    _VALID_URL = r'https?://(?:.+?\.)?tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
 
     _TESTS = [
         {
index 17add954338c1f5d70640577f7e75a8afab5c7e3..abad3ff64b5e519414615d3dd3cf8da345e9a2f3 100644 (file)
@@ -9,7 +9,7 @@ from ..compat import compat_parse_qs
 
 class TlcDeIE(InfoExtractor):
     IE_NAME = 'tlc.de'
-    _VALID_URL = r'http://www\.tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?'
+    _VALID_URL = r'https?://www\.tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?'
 
     _TEST = {
         'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
index 2756f56d3a94ae8f2bed64aa39acf4d45616366b..2579ba8c67498c91aa117c6853b83f391ccb3ba6 100644 (file)
@@ -41,7 +41,7 @@ class ToypicsIE(InfoExtractor):
 
 class ToypicsUserIE(InfoExtractor):
     IE_DESC = 'Toypics user profile'
-    _VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
+    _VALID_URL = r'https?://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
     _TEST = {
         'url': 'http://videos.toypics.net/Mikey',
         'info_dict': {
index 0e01b15fcc51ed41d6ace902058e8446ad5625fd..747370d12d7fc8fd1c66b1ac101db0ba01c963e5 100644 (file)
@@ -7,7 +7,7 @@ from .common import InfoExtractor
 
 class TrailerAddictIE(InfoExtractor):
     _WORKING = False
-    _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
+    _VALID_URL = r'(?:https?://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
     _TEST = {
         'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
         'md5': '41365557f3c8c397d091da510e73ceb4',
index d239949a668a3cfb1d3267bf0be8780c2bda6569..65770562309186acc95c33c2165d3d50ae8f5f36 100644 (file)
@@ -7,7 +7,7 @@ from .nuevo import NuevoBaseIE
 
 
 class TrollvidsIE(NuevoBaseIE):
-    _VALID_URL = r'http://(?:www\.)?trollvids\.com/video/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?trollvids\.com/video/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
     IE_NAME = 'trollvids'
     _TEST = {
         'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff',
index f56b66d06f2f7323b60d2fc4788d26cb3274ef9e..9892e8a62bf9731b8d3e5768333f71baaecd6288 100644 (file)
@@ -6,6 +6,7 @@ from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
     int_or_none,
+    InAdvancePagedList,
     float_or_none,
     unescapeHTML,
 )
@@ -75,15 +76,16 @@ class TudouIE(InfoExtractor):
         quality = sorted(filter(lambda k: k.isdigit(), segments.keys()),
                          key=lambda k: int(k))[-1]
         parts = segments[quality]
-        result = []
         len_parts = len(parts)
         if len_parts > 1:
             self.to_screen('%s: found %s parts' % (video_id, len_parts))
-        for part in parts:
+
+        def part_func(partnum):
+            part = parts[partnum]
             part_id = part['k']
             final_url = self._url_for_id(part_id, quality)
             ext = (final_url.split('?')[0]).split('.')[-1]
-            part_info = {
+            return [{
                 'id': '%s' % part_id,
                 'url': final_url,
                 'ext': ext,
@@ -97,12 +99,13 @@ class TudouIE(InfoExtractor):
                 'http_headers': {
                     'Referer': self._PLAYER_URL,
                 },
-            }
-            result.append(part_info)
+            }]
+
+        entries = InAdvancePagedList(part_func, len_parts, 1)
 
         return {
             '_type': 'multi_video',
-            'entries': result,
+            'entries': entries,
             'id': video_id,
             'title': title,
         }
index 4f844706d365950d6e1d04e237a5c20df7a1cafc..cea117c79407ed9a3f8cac283718f392746c7a06 100644 (file)
@@ -8,7 +8,7 @@ from ..utils import int_or_none
 
 
 class TumblrIE(InfoExtractor):
-    _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'
+    _VALID_URL = r'https?://(?P<blog_name>.*?)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'
     _TESTS = [{
         'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
         'md5': '479bb068e5b16462f5176a6828829767',
index 8322cc14da821f4615a4a8038904039b01c18827..ae4cfaec29b493c3b8b8e11705629901a07a2bf2 100644 (file)
@@ -1,7 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import json
+import re
 
 from .common import InfoExtractor
 from ..utils import ExtractorError
@@ -27,10 +27,9 @@ class TuneInBaseIE(InfoExtractor):
         if not streams_url.startswith('http://'):
             streams_url = compat_urlparse.urljoin(url, streams_url)
 
-        stream_data = self._download_webpage(
-            streams_url, content_id, note='Downloading stream data')
-        streams = json.loads(self._search_regex(
-            r'\((.*)\);', stream_data, 'stream info'))['Streams']
+        streams = self._download_json(
+            streams_url, content_id, note='Downloading stream data',
+            transform_source=lambda s: re.sub(r'^\s*\((.*)\);\s*$', r'\1', s))['Streams']
 
         is_live = None
         formats = []
index 1457e524e810c8bda02795c1b8dd78e95c47802e..86bb7915db170ecf4c75fdc2b960160a65daa1c0 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class TV2IE(InfoExtractor):
-    _VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.tv2.no/v/916509/',
         'info_dict': {
@@ -100,7 +100,7 @@ class TV2IE(InfoExtractor):
 
 
 class TV2ArticleIE(InfoExtractor):
-    _VALID_URL = 'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542',
         'info_dict': {
index 3a4f393fcf6d79f3f42970db7aab853d5efedf84..4065354ddde2c63698908dfac81dc98cac77e79d 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class TVCIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.tvc.ru/video/iframe/id/74622/isPlay/false/id_stat/channel/?acc_video_id=/channel/brand/id/17/show/episodes/episode_id/39702',
         'md5': 'bbc5ff531d1e90e856f60fc4b3afd708',
@@ -64,7 +64,7 @@ class TVCIE(InfoExtractor):
 
 
 class TVCArticleIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?tvc\.ru/(?!video/iframe/id/)(?P<id>[^?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?tvc\.ru/(?!video/iframe/id/)(?P<id>[^?#]+)'
     _TESTS = [{
         'url': 'http://www.tvc.ru/channel/brand/id/29/show/episodes/episode_id/39702/',
         'info_dict': {
index b4683de542dffbcc839f162d282285b3cb5d02f3..df70a6b230a4217261f3f69a3e1213a88f07afbf 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 class TVPlayIE(InfoExtractor):
     IE_DESC = 'TV3Play and related services'
-    _VALID_URL = r'''(?x)http://(?:www\.)?
+    _VALID_URL = r'''(?x)https?://(?:www\.)?
         (?:tvplay\.lv/parraides|
            tv3play\.lt/programos|
            play\.tv3\.lt/programos|
index d5023775857a8bd27ee0f19cffd8176a477be4fd..1d52cbc98e38c0b1d014578c61b95eeb1c9e3f61 100644 (file)
@@ -10,7 +10,7 @@ from ..utils import (
 
 
 class UbuIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
     _TEST = {
         'url': 'http://ubu.com/film/her_noise.html',
         'md5': '138d5652618bf0f03878978db9bef1ee',
index 594bee4f9a681f928f37887270b062b6e7079514..66d9f1bf3fc9ff8481fb55aa8045078244b11635 100644 (file)
@@ -7,7 +7,7 @@ from ..utils import qualities
 
 
 class UnistraIE(InfoExtractor):
-    _VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
+    _VALID_URL = r'https?://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
 
     _TESTS = [
         {
index 3794bcded273235f3fc2a77097b1ca6e48fab348..b755dda902f3370de27a8f496e7cc4b3b8b76a31 100644 (file)
@@ -13,7 +13,7 @@ from ..utils import (
 
 
 class Vbox7IE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)'
     _TEST = {
         'url': 'http://vbox7.com/play:249bb972c2',
         'md5': '99f65c0c9ef9b682b97313e052734c3f',
index 9633f7ffeec865c69c77a0e2d7475399a998d44a..23ce0a0d1929febac87f789374d8411d7b7ddd00 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class VeohIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)'
+    _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)'
 
     _TESTS = [
         {
index a0c59a2e0e1cb8fca2e0e3eb3ec2e4edce2918bb..cb64ae0bd07cdca051eb3aa10550840a296ded85 100644 (file)
@@ -10,7 +10,7 @@ from .rutv import RUTVIE
 
 class VestiIE(InfoExtractor):
     IE_DESC = 'Вести.Ru'
-    _VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)'
+    _VALID_URL = r'https?://(?:.+?\.)?vesti\.ru/(?P<id>.+)'
 
     _TESTS = [
         {
index e148b1ef513321376efe1795056503ea2a8bcad8..b11cd254c7da9c8c780dedd2b2db120f8025c74b 100644 (file)
@@ -214,7 +214,7 @@ class VGTVIE(XstreamIE):
 class BTArticleIE(InfoExtractor):
     IE_NAME = 'bt:article'
     IE_DESC = 'Bergens Tidende Articles'
-    _VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
+    _VALID_URL = r'https?://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
     _TEST = {
         'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html',
         'md5': '2acbe8ad129b3469d5ae51b1158878df',
@@ -241,7 +241,7 @@ class BTArticleIE(InfoExtractor):
 class BTVestlendingenIE(InfoExtractor):
     IE_NAME = 'bt:vestlendingen'
     IE_DESC = 'Bergens Tidende - Vestlendingen'
-    _VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588',
         'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
index 2cd36508a2dfef07a963109327349e7c453ab298..0f798711bca7ebc25c893f82746ed3b1a49ff778 100644 (file)
@@ -14,7 +14,7 @@ class VideoTtIE(InfoExtractor):
     _WORKING = False
     ID_NAME = 'video.tt'
     IE_DESC = 'video.tt - Your True Tube'
-    _VALID_URL = r'http://(?:www\.)?video\.tt/(?:(?:video|embed)/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})'
+    _VALID_URL = r'https?://(?:www\.)?video\.tt/(?:(?:video|embed)/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})'
 
     _TESTS = [{
         'url': 'http://www.video.tt/watch_video.php?v=amd5YujV8',
index 315984bf9dbd298304d4211c3793f3aaf5d267e2..03b9f13534a880f3d6cfa2a0780ed8dfa2c07ad7 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 
 
 class ViideaIE(InfoExtractor):
-    _VALID_URL = r'''(?x)http://(?:www\.)?(?:
+    _VALID_URL = r'''(?x)https?://(?:www\.)?(?:
             videolectures\.net|
             flexilearn\.viidea\.net|
             presentations\.ocwconsortium\.org|
index 9e2aa58bd94270aee79800ec2d4d4ac18bad28e8..bd55451732cea69570753cb7064066537f5600ae 100644 (file)
@@ -64,7 +64,7 @@ class VLiveIE(InfoExtractor):
 
         thumbnail = self._og_search_thumbnail(webpage)
         creator = self._html_search_regex(
-            r'<div[^>]+class="info_area"[^>]*>\s*<strong[^>]+class="name"[^>]*>([^<]+)</strong>',
+            r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)',
             webpage, 'creator', fatal=False)
 
         view_count = int_or_none(playinfo.get('meta', {}).get('count'))
index 149e364677fcab4d0374479c4b96ff741277b17e..10ca6acb12469f85267405f9431b9508c0537e57 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 class VubeIE(InfoExtractor):
     IE_NAME = 'vube'
     IE_DESC = 'Vube.com'
-    _VALID_URL = r'http://vube\.com/(?:[^/]+/)+(?P<id>[\da-zA-Z]{10})\b'
+    _VALID_URL = r'https?://vube\.com/(?:[^/]+/)+(?P<id>[\da-zA-Z]{10})\b'
 
     _TESTS = [
         {
index a6d9b5fee1f4864d82c7f8bb83e87884c96afe3b..eaa888f005cc61c53b8f45c3e3b93633083b17ed 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class VuClipIE(InfoExtractor):
-    _VALID_URL = r'http://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
 
     _TEST = {
         'url': 'http://m.vuclip.com/w?cid=922692425&fid=70295&z=1010&nvar&frm=index.html',
index 24efbd6e6341ba5aa73e5df11cb9af36f941da43..8b9488340368ea0292fa2614336778099c9eb11e 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class WallaIE(InfoExtractor):
-    _VALID_URL = r'http://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
+    _VALID_URL = r'https?://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
     _TEST = {
         'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
         'info_dict': {
index 37cf3d3097c94b39f1b66ab11e0e651579f8d533..5227bb5ad9a2cd4f71c156cd8ca9bb3f5fbd5d17 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class WatIE(InfoExtractor):
-    _VALID_URL = r'(?:wat:(?P<real_id>\d{8})|http://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html)'
+    _VALID_URL = r'(?:wat:(?P<real_id>\d{8})|https?://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html)'
     IE_NAME = 'wat.tv'
     _TESTS = [
         {
index a851578e075589e6b36d828135f9919a9f1d0c38..31c90430327da895ffc974c1d489cb4c92689d2f 100644 (file)
@@ -244,7 +244,7 @@ class WDRMobileIE(InfoExtractor):
 
 
 class WDRMausIE(InfoExtractor):
-    _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
+    _VALID_URL = r'https?://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
     IE_DESC = 'Sendung mit der Maus'
     _TESTS = [{
         'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
index e333ae345b16e6ff6a25d2f57559c970eaf601d9..3dafbeec2c5f7ba0b2e18ec621c67966214d3307 100644 (file)
@@ -6,7 +6,7 @@ from .common import InfoExtractor
 
 class WeiqiTVIE(InfoExtractor):
     IE_DESC = 'WQTV'
-    _VALID_URL = r'http://www\.weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)'
+    _VALID_URL = r'https?://www\.weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)'
 
     _TESTS = [{
         'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3',
index fb0accac744532625c04bb964c1fa031723ed8ff..828c03dc38c4d4d4668f6dfb66e4cc29c51fd7e5 100644 (file)
@@ -5,7 +5,7 @@ from .youtube import YoutubeIE
 
 
 class WimpIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?wimp\.com/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://www.wimp.com/maruexhausted/',
         'md5': 'ee21217ffd66d058e8b16be340b74883',
index 4ff99e5ca37fb8f4f0b663cc99761c31e75f1cf4..e4a2baad22534d772a90b8ec5832c11833f10281 100644 (file)
@@ -5,7 +5,7 @@ from ..compat import compat_urllib_parse_unquote
 
 
 class XBefIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?xbef\.com/video/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?xbef\.com/video/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://xbef.com/video/5119-glamourous-lesbians-smoking-drinking-and-fucking',
         'md5': 'a478b565baff61634a98f5e5338be995',
index fd43e8854c994e5b15c661ce49636853c41d2ecd..b3547174dd92beffafaf8f220b50b94a25f2fa2b 100644 (file)
@@ -4,6 +4,7 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    dict_get,
     float_or_none,
     int_or_none,
     unified_strdate,
@@ -170,6 +171,12 @@ class XHamsterEmbedIE(InfoExtractor):
 
         video_url = self._search_regex(
             r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id,
-            webpage, 'xhamster url')
+            webpage, 'xhamster url', default=None)
+
+        if not video_url:
+            vars = self._parse_json(
+                self._search_regex(r'vars\s*:\s*({.+?})\s*,\s*\n', webpage, 'vars'),
+                video_id)
+            video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))
 
         return self.url_result(video_url, 'XHamster')
index 001ee17b6f93d457bdc2fbdaf802b61ef19e1b41..63bbc06346a04b385c722eaae22d0ff5c41445f4 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 
 class YamIE(InfoExtractor):
     IE_DESC = '蕃薯藤yam天空部落'
-    _VALID_URL = r'http://mymedia.yam.com/m/(?P<id>\d+)'
+    _VALID_URL = r'https?://mymedia.yam.com/m/(?P<id>\d+)'
 
     _TESTS = [{
         # An audio hosted on Yam
index 869f3e8190ca0b751366a85f142a0b49fe294fa1..2522551dc061b6652d143f1e4d09ee2428b827dc 100644 (file)
@@ -9,7 +9,7 @@ from ..compat import compat_urllib_parse_unquote_plus
 
 
 class YnetIE(InfoExtractor):
-    _VALID_URL = r'http://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html'
+    _VALID_URL = r'https?://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html'
     _TESTS = [
         {
             'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html',
index 822728afc5b7a1bbae9e97435a0fffb0dd1d25e9..7819f14ab0b36786e06a432f15cb9e0429320288 100644 (file)
@@ -724,7 +724,7 @@ def parseOpts(overrideArguments=None):
     postproc.add_option(
         '--embed-subs',
         action='store_true', dest='embedsubtitles', default=False,
-        help='Embed subtitles in the video (only for mkv and mp4 videos)')
+        help='Embed subtitles in the video (only for mp4, webm and mkv videos)')
     postproc.add_option(
         '--embed-thumbnail',
         action='store_true', dest='embedthumbnail', default=False,
index a8819f258013de5a1cbbf1a5b42ca87b2b4ef14e..06b8c05482013c1c521267c7d4406c0124f0c972 100644 (file)
@@ -331,17 +331,34 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):
 
 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
     def run(self, information):
-        if information['ext'] not in ['mp4', 'mkv']:
-            self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 or mkv files')
+        if information['ext'] not in ('mp4', 'webm', 'mkv'):
+            self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4, webm or mkv files')
             return [], information
         subtitles = information.get('requested_subtitles')
         if not subtitles:
             self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
             return [], information
 
-        sub_langs = list(subtitles.keys())
         filename = information['filepath']
-        sub_filenames = [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()]
+
+        ext = information['ext']
+        sub_langs = []
+        sub_filenames = []
+        webm_vtt_warn = False
+
+        for lang, sub_info in subtitles.items():
+            sub_ext = sub_info['ext']
+            if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
+                sub_langs.append(lang)
+                sub_filenames.append(subtitles_filename(filename, lang, sub_ext))
+            else:
+                if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
+                    webm_vtt_warn = True
+                    self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files')
+
+        if not sub_langs:
+            return [], information
+
         input_files = [filename] + sub_filenames
 
         opts = [
index 067b8a184c67ea152e3769bcdbcc63bf43afca77..03bb7782f492daa247c6ababba6c975d5548d75e 100644 (file)
@@ -1746,6 +1746,7 @@ def escape_url(url):
     """Escape URL as suggested by RFC 3986"""
     url_parsed = compat_urllib_parse_urlparse(url)
     return url_parsed._replace(
+        netloc=url_parsed.netloc.encode('idna').decode('ascii'),
         path=escape_rfc3986(url_parsed.path),
         params=escape_rfc3986(url_parsed.params),
         query=escape_rfc3986(url_parsed.query),