Merge branch 'master' of github.com-rndusr:rg3/youtube-dl into fix/str-item-assignment
authorRandom User <rndusr@posteo.de>
Sat, 25 Mar 2017 20:36:59 +0000 (21:36 +0100)
committerRandom User <rndusr@posteo.de>
Sat, 25 Mar 2017 20:36:59 +0000 (21:36 +0100)
76 files changed:
.github/ISSUE_TEMPLATE.md
AUTHORS
ChangeLog
README.md
docs/supportedsites.md
test/test_InfoExtractor.py
test/test_compat.py
test/test_download.py
test/test_utils.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/cache.py
youtube_dl/downloader/external.py
youtube_dl/downloader/hls.py
youtube_dl/extractor/addanime.py
youtube_dl/extractor/adobepass.py
youtube_dl/extractor/afreecatv.py
youtube_dl/extractor/arkena.py
youtube_dl/extractor/atresplayer.py
youtube_dl/extractor/atvat.py [new file with mode: 0644]
youtube_dl/extractor/bellmedia.py
youtube_dl/extractor/bostonglobe.py [new file with mode: 0644]
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/ceskatelevize.py
youtube_dl/extractor/channel9.py
youtube_dl/extractor/cloudy.py
youtube_dl/extractor/common.py
youtube_dl/extractor/condenast.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/discoverygo.py
youtube_dl/extractor/discoverynetworks.py [new file with mode: 0644]
youtube_dl/extractor/douyutv.py
youtube_dl/extractor/dplay.py
youtube_dl/extractor/drtv.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/eyedotv.py
youtube_dl/extractor/facebook.py
youtube_dl/extractor/fox.py
youtube_dl/extractor/franceculture.py
youtube_dl/extractor/freshlive.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/go.py
youtube_dl/extractor/hbo.py
youtube_dl/extractor/livestream.py
youtube_dl/extractor/medialaan.py [new file with mode: 0644]
youtube_dl/extractor/miomio.py
youtube_dl/extractor/mitele.py
youtube_dl/extractor/ninecninemedia.py
youtube_dl/extractor/npo.py
youtube_dl/extractor/openload.py
youtube_dl/extractor/pluralsight.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/prosiebensat1.py
youtube_dl/extractor/redbulltv.py [new file with mode: 0644]
youtube_dl/extractor/rutube.py
youtube_dl/extractor/ruutu.py
youtube_dl/extractor/senateisvp.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/streamable.py
youtube_dl/extractor/telecinco.py
youtube_dl/extractor/telequebec.py
youtube_dl/extractor/tlc.py [deleted file]
youtube_dl/extractor/toongoggles.py [new file with mode: 0644]
youtube_dl/extractor/tunepk.py [new file with mode: 0644]
youtube_dl/extractor/twentyfourvideo.py
youtube_dl/extractor/twitch.py
youtube_dl/extractor/vier.py
youtube_dl/extractor/viu.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/vrak.py [new file with mode: 0644]
youtube_dl/extractor/wdr.py
youtube_dl/extractor/youtube.py
youtube_dl/options.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/utils.py
youtube_dl/version.py

index 6374f7c255f182cdd0435dcfe4a418ae29298b52..dfff41d2d00b6d68152615cc1b93f4a4486f3f87 100644 (file)
@@ -6,8 +6,8 @@
 
 ---
 
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.27**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.24**
 
 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2017.02.27
+[debug] youtube-dl version 2017.03.24
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
diff --git a/AUTHORS b/AUTHORS
index 247c0ea13f82ca1783f717378d1bfecccd3f243f..2d676b210827d6950ddc20679165bdf252935764 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -202,3 +202,11 @@ Fabian Stahl
 Bagira
 Odd Stråbø
 Philip Herzog
+Thomas Christlieb
+Marek Rusinowski
+Tobias Gruetzmacher
+Olivier Bilodeau
+Lars Vierbergen
+Juanjo Benages
+Xiao Di Guan
+Thomas Winant
index 401c5885ea877a52162d3cbfc5661105592e4237..45d6f244dd189e5368de8d76b9efd7ecc1a2e75e 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,149 @@
 version <unreleased>
 
 Extractors
-+ [daisuki] Add new extractor (#2486, #3186, #4738, #6175, #7776, #10060)
+* [afreecatv] Fix extraction (#12179)
+
+
+version 2017.03.24
+
+Extractors
+- [9c9media] Remove mp4 URL extraction request
++ [bellmedia] Add support for etalk.ca and space.ca (#12447)
+* [channel9] Fix extraction (#11323)
+* [cloudy] Fix extraction (#12525)
++ [hbo] Add support for free episode URLs and new formats extraction (#12519)
+* [condenast] Fix extraction and style (#12526)
+* [viu] Relax URL regular expression (#12529)
+
+
+version 2017.03.22
+
+Extractors
+- [pluralsight] Omit module title from video title (#12506)
+* [pornhub] Decode obfuscated video URL (#12470, #12515)
+* [senateisvp] Allow https URL scheme for embeds (#12512)
+
+
+version 2017.03.20
+
+Core
++ [YoutubeDL] Allow multiple input URLs to be used with stdout (-) as
+  output template
++ [adobepass] Detect and output error on authz token extraction (#12472)
+
+Extractors
++ [bostonglobe] Add extractor for bostonglobe.com (#12099)
++ [toongoggles] Add support for toongoggles.com (#12171)
++ [medialaan] Add support for Medialaan sites (#9974, #11912)
++ [discoverynetworks] Add support for more domains and bypass geo restiction
+* [openload] Fix extraction (#10408)
+
+
+version 2017.03.16
+
+Core
++ [postprocessor/ffmpeg] Add support for flac
++ [extractor/common] Extract SMIL formats from jwplayer
+
+Extractors
++ [generic] Add forgotten return for jwplayer formats
+* [redbulltv] Improve extraction
+
+
+version 2017.03.15
+
+Core
+* Fix missing subtitles if --add-metadata is used (#12423)
+
+Extractors
+* [facebook] Make title optional (#12443)
++ [mitele] Add support for ooyala videos (#12430)
+* [openload] Fix extraction (#12435, #12446)
+* [streamable] Update API URL (#12433)
++ [crunchyroll] Extract season name (#12428)
+* [discoverygo] Bypass geo restriction
++ [discoverygo:playlist] Add support for playlists (#12424)
+
+
+version 2017.03.10
+
+Extractors
+* [generic] Make title optional for jwplayer embeds (#12410)
+* [wdr:maus] Fix extraction (#12373)
+* [prosiebensat1] Improve title extraction (#12318, #12327)
+* [dplayit] Separate and rewrite extractor and bypass geo restriction (#12393)
+* [miomio] Fix extraction (#12291, #12388, #12402)
+* [telequebec] Fix description extraction (#12399)
+* [openload] Fix extraction (#12357)
+* [brightcove:legacy] Relax videoPlayer validation check (#12381)
+
+
+version 2017.03.07
+
+Core
+* Metadata are now added after conversion (#5594)
+
+Extractors
+* [soundcloud] Update client id (#12376)
+* [openload] Fix extraction (#10408, #12357)
+
+
+version 2017.03.06
+
+Core
++ [utils] Process bytestrings in urljoin (#12369)
+* [extractor/common] Improve height extraction and extract bitrate
+* [extractor/common] Move jwplayer formats extraction in separate method
++ [external:ffmpeg] Limit test download size to 10KiB (#12362)
+
+Extractors
++ [drtv] Add geo countries to GeoRestrictedError
++ [drtv:live] Bypass geo restriction
++ [tunepk] Add extractor (#12197, #12243)
+
+
+version 2017.03.05
+
+Extractors
++ [twitch] Add basic support for two-factor authentication (#11974)
++ [vier] Add support for vijf.be (#12304)
++ [redbulltv] Add support for redbull.tv (#3919, #11948)
+* [douyutv] Switch to the PC API to escape the 5-min limitation (#12316)
++ [generic] Add support for rutube embeds
++ [rutube] Relax URL regular expression
++ [vrak] Add support for vrak.tv (#11452)
++ [brightcove:new] Add ability to smuggle geo_countries into URL
++ [brightcove:new] Raise GeoRestrictedError
+* [go] Relax URL regular expression (#12341)
+* [24video] Use original host for requests (#12339)
+* [ruutu] Disable DASH formats (#12322)
+
+
+version 2017.03.02
+
+Core
++ [adobepass] Add support for Charter Spectrum (#11465)
+* [YoutubeDL] Don't sanitize identifiers in output template (#12317)
+
+Extractors
+* [facebook] Fix extraction (#12323, #12330)
+* [youtube] Mark errors about rental videos as expected (#12324)
++ [npo] Add support for audio
+* [npo] Adapt to app.php API (#12311, #12320)
+
+
+version 2017.02.28
+
+Core
++ [utils] Add bytes_to_long and long_to_bytes
++ [utils] Add pkcs1pad
++ [aes] Add aes_cbc_encrypt
+
+Extractors
++ [azmedien:showplaylist] Add support for show playlists (#12160)
++ [youtube:playlist] Recognize another playlist pattern (#11928, #12286)
++ [daisuki] Add support for daisuki.net (#2486, #3186, #4738, #6175, #7776,
+  #10060)
 * [douyu] Fix extraction (#12301)
 
 
index 0fc5984dc4781ae46b35afdd1d7c8e4426562274..86b44781ca21b2eb3c724eb6cfe0f3f1a022ce6b 100644 (file)
--- a/README.md
+++ b/README.md
@@ -375,8 +375,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
                                      (requires ffmpeg or avconv and ffprobe or
                                      avprobe)
     --audio-format FORMAT            Specify audio format: "best", "aac",
-                                     "vorbis", "mp3", "m4a", "opus", or "wav";
-                                     "best" by default; No effect without -x
+                                     "flac", "mp3", "m4a", "opus", "vorbis", or
+                                     "wav"; "best" by default; No effect without
+                                     -x
     --audio-quality QUALITY          Specify ffmpeg/avconv audio quality, insert
                                      a value between 0 (better) and 9 (worse)
                                      for VBR or a specific bitrate like 128K
index 1b01c6d9d4648c175acf187ceebca7f0893e83f2..7c99ba3c20418dd29d17fa35963df8595a245091 100644 (file)
@@ -78,6 +78,7 @@
  - **awaan:video**
  - **AZMedien**: AZ Medien videos
  - **AZMedienPlaylist**: AZ Medien playlists
+ - **AZMedienShowPlaylist**: AZ Medien show playlists
  - **Azubu**
  - **AzubuLive**
  - **BaiduVideo**: 百度视频
  - **blinkx**
  - **Bloomberg**
  - **BokeCC**
+ - **BostonGlobe**
  - **Bpb**: Bundeszentrale für politische Bildung
  - **BR**: Bayerischer Rundfunk Mediathek
  - **BravoTV**
  - **dailymotion:playlist**
  - **dailymotion:user**
  - **DailymotionCloud**
+ - **Daisuki**
+ - **DaisukiPlaylist**
  - **daum.net**
  - **daum.net:clip**
  - **daum.net:playlist**
  - **Digiteka**
  - **Discovery**
  - **DiscoveryGo**
+ - **DiscoveryGoPlaylist**
+ - **DiscoveryNetworksDe**
  - **Disney**
  - **Dotsub**
  - **DouyuTV**: 斗鱼
  - **DPlay**
+ - **DPlayIt**
  - **dramafever**
  - **dramafever:series**
  - **DRBonanza**
  - **GPUTechConf**
  - **Groupon**
  - **Hark**
- - **HBO**
- - **HBOEpisode**
+ - **hbo**
+ - **hbo:episode**
  - **HearThisAt**
  - **Heise**
  - **HellPorno**
  - **MatchTV**
  - **MDR**: MDR.DE and KiKA
  - **media.ccc.de**
+ - **Medialaan**
  - **Meipai**: 美拍
  - **MelonVOD**
  - **META**
  - **RaiTV**
  - **RBMARadio**
  - **RDS**: RDS.ca
+ - **RedBullTV**
  - **RedTube**
  - **RegioTV**
  - **RENTV**
  - **ThisAV**
  - **ThisOldHouse**
  - **tinypic**: tinypic.com videos
- - **tlc.de**
  - **TMZ**
  - **TMZArticle**
  - **TNAFlix**
  - **TNAFlixNetworkEmbed**
  - **toggle**
+ - **ToonGoggles**
  - **Tosh**: Tosh.0
  - **tou.tv**
  - **Toypics**: Toypics user profile
  - **tunein:program**
  - **tunein:station**
  - **tunein:topic**
+ - **TunePk**
  - **Turbo**
  - **Tutv**
  - **tv.dfb.de**
  - **VoxMedia**
  - **Vporn**
  - **vpro**: npo.nl and ntr.nl
+ - **Vrak**
  - **VRT**
  - **vube**: Vube.com
  - **VuClip**
index 437c7270ee6aeaa8eba588badfb3bf26d79ea37d..881197afbe60a5a2c190e81a6a6bd706459f6367 100644 (file)
@@ -8,7 +8,7 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import FakeYDL
+from test.helper import FakeYDL, expect_dict
 from youtube_dl.extractor.common import InfoExtractor
 from youtube_dl.extractor import YoutubeIE, get_info_extractor
 from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
@@ -84,6 +84,97 @@ class TestInfoExtractor(unittest.TestCase):
         self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
         self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
 
+    def test_extract_jwplayer_data_realworld(self):
+        # from http://www.suffolk.edu/sjc/
+        expect_dict(
+            self,
+            self.ie._extract_jwplayer_data(r'''
+                <script type='text/javascript'>
+                    jwplayer('my-video').setup({
+                        file: 'rtmp://192.138.214.154/live/sjclive',
+                        fallback: 'true',
+                        width: '95%',
+                      aspectratio: '16:9',
+                      primary: 'flash',
+                      mediaid:'XEgvuql4'
+                    });
+                </script>
+                ''', None, require_title=False),
+            {
+                'id': 'XEgvuql4',
+                'formats': [{
+                    'url': 'rtmp://192.138.214.154/live/sjclive',
+                    'ext': 'flv'
+                }]
+            })
+
+        # from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/
+        expect_dict(
+            self,
+            self.ie._extract_jwplayer_data(r'''
+<script type="text/javascript">
+    jwplayer("mediaplayer").setup({
+        'videoid': "7564",
+        'width': "100%",
+        'aspectratio': "16:9",
+        'stretching': "exactfit",
+        'autostart': 'false',
+        'flashplayer': "https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf",
+        'file': "https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv",
+        'image': "https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg",
+        'filefallback': "https://cdn.pornoxo.com/key=9ZPsTR5EvPLQrBaak2MUGA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/m_4b2157147afe5efa93ce1978e0265289c193874e02597.mp4",
+        'logo.hide': true,
+        'skin': "https://t04.vipstreamservice.com/jwplayer/skin/modieus-blk.zip",
+        'plugins': "https://t04.vipstreamservice.com/jwplayer/dock/dockableskinnableplugin.swf",
+        'dockableskinnableplugin.piclink': "/index.php?key=ajax-videothumbsn&vid=7564&data=2009-12--14--4b2157147afe5efa93ce1978e0265289c193874e02597.flv--17370",
+        'controlbar': 'bottom',
+        'modes': [
+            {type: 'flash', src: 'https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf'}
+        ],
+        'provider': 'http'
+    });
+    //noinspection JSAnnotator
+    invideo.setup({
+        adsUrl: "/banner-iframe/?zoneId=32",
+        adsUrl2: "",
+        autostart: false
+    });
+</script>
+            ''', 'dummy', require_title=False),
+            {
+                'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg',
+                'formats': [{
+                    'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv',
+                    'ext': 'flv'
+                }]
+            })
+
+        # from http://www.indiedb.com/games/king-machine/videos
+        expect_dict(
+            self,
+            self.ie._extract_jwplayer_data(r'''
+<script>
+jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/\/www.indiedb.com\/","displaytitle":false,"autostart":false,"repeat":false,"title":"king machine trailer 1","sharing":{"link":"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1","code":"<iframe width=\"560\" height=\"315\" src=\"http:\/\/www.indiedb.com\/media\/iframe\/1522983\" frameborder=\"0\" allowfullscreen><\/iframe><br><a href=\"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1\">king machine trailer 1 - Indie DB<\/a>"},"related":{"file":"http:\/\/rss.indiedb.com\/media\/recommended\/1522983\/feed\/rss.xml","dimensions":"160x120","onclick":"link"},"sources":[{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode_mp4\/king-machine-trailer.mp4","label":"360p SD","default":"true"},{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode720p_mp4\/king-machine-trailer.mp4","label":"720p HD"}],"image":"http:\/\/media.indiedb.com\/cache\/images\/games\/1\/50\/49678\/thumb_620x2000\/king-machine-trailer.mp4.jpg","advertising":{"client":"vast","tag":"http:\/\/ads.intergi.com\/adrawdata\/3.0\/5205\/4251742\/0\/1013\/ADTECH;cors=yes;width=560;height=315;referring_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;content_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;media_id=1522983;title=king+machine+trailer+1;device=__DEVICE__;model=__MODEL__;os=Windows+OS;osversion=__OSVERSION__;ua=__UA__;ip=109.171.17.81;uniqueid=1522983;tags=__TAGS__;number=58cac25928151;time=1489683033"},"width":620,"height":349}).once("play", function(event) {
+            videoAnalytics("play");
+}).once("complete", function(event) {
+    videoAnalytics("completed");
+});
+</script>
+                ''', 'dummy'),
+            {
+                'title': 'king machine trailer 1',
+                'thumbnail': 'http://media.indiedb.com/cache/images/games/1/50/49678/thumb_620x2000/king-machine-trailer.mp4.jpg',
+                'formats': [{
+                    'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4',
+                    'height': 360,
+                    'ext': 'mp4'
+                }, {
+                    'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4',
+                    'height': 720,
+                    'ext': 'mp4'
+                }]
+            })
+
 
 if __name__ == '__main__':
     unittest.main()
index b574249489a3ded4cf5dcd66e49c123829c2331e..d6c54e135810f9d03a970e3d261d64c70bf19530 100644 (file)
@@ -27,11 +27,11 @@ from youtube_dl.compat import (
 class TestCompat(unittest.TestCase):
     def test_compat_getenv(self):
         test_str = 'тест'
-        compat_setenv('YOUTUBE-DL-TEST', test_str)
-        self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
+        compat_setenv('YOUTUBE_DL_COMPAT_GETENV', test_str)
+        self.assertEqual(compat_getenv('YOUTUBE_DL_COMPAT_GETENV'), test_str)
 
     def test_compat_setenv(self):
-        test_var = 'YOUTUBE-DL-TEST'
+        test_var = 'YOUTUBE_DL_COMPAT_SETENV'
         test_str = 'тест'
         compat_setenv(test_var, test_str)
         compat_getenv(test_var)
index 30034f9782410b1f0e9300916fb101e02f42050c..01a8bcb89814a5fd450d0d9c1a7959224e37bbcb 100644 (file)
@@ -71,6 +71,18 @@ class TestDownload(unittest.TestCase):
 
     maxDiff = None
 
+    def __str__(self):
+        """Identify each test with the `add_ie` attribute, if available."""
+
+        def strclass(cls):
+            """From 2.7's unittest; 2.6 had _strclass so we can't import it."""
+            return '%s.%s' % (cls.__module__, cls.__name__)
+
+        add_ie = getattr(self, self._testMethodName).add_ie
+        return '%s (%s)%s:' % (self._testMethodName,
+                               strclass(self.__class__),
+                               ' [%s]' % add_ie if add_ie else '')
+
     def setUp(self):
         self.defs = defs
 
@@ -233,6 +245,8 @@ for n, test_case in enumerate(defs):
         i += 1
     test_method = generator(test_case, tname)
     test_method.__name__ = str(tname)
+    ie_list = test_case.get('add_ie')
+    test_method.add_ie = ie_list and ','.join(ie_list)
     setattr(TestDownload, test_method.__name__, test_method)
     del test_method
 
index aefd94518c37e8cf1d33f788bbeefbd9470c5f1d..aa4569b819e368381a9ee100a6060e38ae0ce065 100644 (file)
@@ -56,6 +56,7 @@ from youtube_dl.utils import (
     read_batch_urls,
     sanitize_filename,
     sanitize_path,
+    expand_path,
     prepend_extension,
     replace_extension,
     remove_start,
@@ -95,6 +96,8 @@ from youtube_dl.utils import (
 from youtube_dl.compat import (
     compat_chr,
     compat_etree_fromstring,
+    compat_getenv,
+    compat_setenv,
     compat_urlparse,
     compat_parse_qs,
 )
@@ -214,6 +217,18 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(sanitize_path('./abc'), 'abc')
         self.assertEqual(sanitize_path('./../abc'), '..\\abc')
 
+    def test_expand_path(self):
+        def env(var):
+            return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
+
+        compat_setenv('YOUTUBE_DL_EXPATH_PATH', 'expanded')
+        self.assertEqual(expand_path(env('YOUTUBE_DL_EXPATH_PATH')), 'expanded')
+        self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME'))
+        self.assertEqual(expand_path('~'), compat_getenv('HOME'))
+        self.assertEqual(
+            expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
+            '%s/expanded' % compat_getenv('HOME'))
+
     def test_prepend_extension(self):
         self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
         self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
@@ -455,6 +470,9 @@ class TestUtil(unittest.TestCase):
 
     def test_urljoin(self):
         self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+        self.assertEqual(urljoin(b'http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+        self.assertEqual(urljoin('http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
+        self.assertEqual(urljoin(b'http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
         self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt')
         self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
         self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
index f7254560c04c87549cd65488408ce3ddfcd4bf5f..21586f0f4abe821c4b8ed5f199cb8e807be4db42 100755 (executable)
@@ -29,7 +29,6 @@ import random
 from .compat import (
     compat_basestring,
     compat_cookiejar,
-    compat_expanduser,
     compat_get_terminal_size,
     compat_http_client,
     compat_kwargs,
@@ -54,6 +53,7 @@ from .utils import (
     encode_compat_str,
     encodeFilename,
     error_to_compat_str,
+    expand_path,
     ExtractorError,
     format_bytes,
     formatSeconds,
@@ -616,7 +616,7 @@ class YoutubeDL(object):
             sanitize = lambda k, v: sanitize_filename(
                 compat_str(v),
                 restricted=self.params.get('restrictfilenames'),
-                is_id=(k == 'id'))
+                is_id=(k == 'id' or k.endswith('_id')))
             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
                                  for k, v in template_dict.items()
                                  if v is not None and not isinstance(v, (list, tuple, dict)))
@@ -672,7 +672,7 @@ class YoutubeDL(object):
                         FORMAT_RE.format(numeric_field),
                         r'%({0})s'.format(numeric_field), outtmpl)
 
-            tmpl = compat_expanduser(outtmpl)
+            tmpl = expand_path(outtmpl)
             filename = tmpl % template_dict
             # Temporary fix for #4787
             # 'Treat' all problem characters by passing filename through preferredencoding
@@ -1872,6 +1872,7 @@ class YoutubeDL(object):
         """Download a given list of URLs."""
         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
         if (len(url_list) > 1 and
+                outtmpl != '-' and
                 '%' not in outtmpl and
                 self.params.get('max_downloads') != 1):
             raise SameFileError(outtmpl)
@@ -2169,7 +2170,7 @@ class YoutubeDL(object):
         if opts_cookiefile is None:
             self.cookiejar = compat_cookiejar.CookieJar()
         else:
-            opts_cookiefile = compat_expanduser(opts_cookiefile)
+            opts_cookiefile = expand_path(opts_cookiefile)
             self.cookiejar = compat_cookiejar.MozillaCookieJar(
                 opts_cookiefile)
             if os.access(opts_cookiefile, os.R_OK):
index 0c401baa6640fc7aaef83cd4ca11a1a4462919b1..f15606568d062b6981076e8739d14037e2d5a5eb 100644 (file)
@@ -16,7 +16,6 @@ from .options import (
     parseOpts,
 )
 from .compat import (
-    compat_expanduser,
     compat_getpass,
     compat_shlex_split,
     workaround_optparse_bug9161,
@@ -26,6 +25,7 @@ from .utils import (
     decodeOption,
     DEFAULT_OUTTMPL,
     DownloadError,
+    expand_path,
     match_filter_func,
     MaxDownloadsReached,
     preferredencoding,
@@ -88,7 +88,7 @@ def _real_main(argv=None):
                 batchfd = sys.stdin
             else:
                 batchfd = io.open(
-                    compat_expanduser(opts.batchfile),
+                    expand_path(opts.batchfile),
                     'r', encoding='utf-8', errors='ignore')
             batch_urls = read_batch_urls(batchfd)
             if opts.verbose:
@@ -196,7 +196,7 @@ def _real_main(argv=None):
     if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
         raise ValueError('Playlist end must be greater than playlist start')
     if opts.extractaudio:
-        if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
+        if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
             parser.error('invalid audio format specified')
     if opts.audioquality:
         opts.audioquality = opts.audioquality.strip('k').strip('K')
@@ -238,18 +238,15 @@ def _real_main(argv=None):
 
     any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
     any_printing = opts.print_json
-    download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
+    download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
 
     # PostProcessors
     postprocessors = []
-    # Add the metadata pp first, the other pps will copy it
     if opts.metafromtitle:
         postprocessors.append({
             'key': 'MetadataFromTitle',
             'titleformat': opts.metafromtitle
         })
-    if opts.addmetadata:
-        postprocessors.append({'key': 'FFmpegMetadata'})
     if opts.extractaudio:
         postprocessors.append({
             'key': 'FFmpegExtractAudio',
@@ -262,6 +259,16 @@ def _real_main(argv=None):
             'key': 'FFmpegVideoConvertor',
             'preferedformat': opts.recodevideo,
         })
+    # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
+    # FFmpegExtractAudioPP as containers before conversion may not support
+    # metadata (3gp, webm, etc.)
+    # And this post-processor should be placed before other metadata
+    # manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of
+    # extra metadata. By default ffmpeg preserves metadata applicable for both
+    # source and target containers. From this point the container won't change,
+    # so metadata can be added here.
+    if opts.addmetadata:
+        postprocessors.append({'key': 'FFmpegMetadata'})
     if opts.convertsubtitles:
         postprocessors.append({
             'key': 'FFmpegSubtitlesConvertor',
@@ -442,7 +449,7 @@ def _real_main(argv=None):
 
         try:
             if opts.load_info_filename is not None:
-                retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename))
+                retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
             else:
                 retcode = ydl.download(all_urls)
         except MaxDownloadsReached:
index 5fe839eb1269586db95b9e25b4c8dabcdcd965d9..7bdade1bdb49a7406457688400830a91a98ef186 100644 (file)
@@ -8,8 +8,11 @@ import re
 import shutil
 import traceback
 
-from .compat import compat_expanduser, compat_getenv
-from .utils import write_json_file
+from .compat import compat_getenv
+from .utils import (
+    expand_path,
+    write_json_file,
+)
 
 
 class Cache(object):
@@ -21,7 +24,7 @@ class Cache(object):
         if res is None:
             cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
             res = os.path.join(cache_root, 'youtube-dl')
-        return compat_expanduser(res)
+        return expand_path(res)
 
     def _get_cache_fn(self, section, key, dtype):
         assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
index bdd3545a2f17a731f2b9326be9de68034b4fb86e..e13cf547d10cbf472440c9f23d010a586b2c453c 100644 (file)
@@ -6,7 +6,10 @@ import sys
 import re
 
 from .common import FileDownloader
-from ..compat import compat_setenv
+from ..compat import (
+    compat_setenv,
+    compat_str,
+)
 from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
 from ..utils import (
     cli_option,
@@ -270,6 +273,10 @@ class FFmpegFD(ExternalFD):
                 args += ['-rtmp_live', 'live']
 
         args += ['-i', url, '-c', 'copy']
+
+        if self.params.get('test', False):
+            args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
+
         if protocol in ('m3u8', 'm3u8_native'):
             if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
                 args += ['-f', 'mpegts']
index 4989abce12ee236e5c528778e5b95f67d92e165e..7534e4da5e3dbea6d3a304b9abcaf62e223b4c30 100644 (file)
@@ -30,6 +30,15 @@ class HlsFD(FragmentFD):
 
     FD_NAME = 'hlsnative'
 
+    def _delegate_to_ffmpeg(self, filename, info_dict):
+        self.report_warning(
+            'hlsnative has detected features it does not support, '
+            'extraction will be delegated to ffmpeg')
+        fd = FFmpegFD(self.ydl, self.params)
+        for ph in self._progress_hooks:
+            fd.add_progress_hook(ph)
+        return fd.real_download(filename, info_dict)
+
     @staticmethod
     def can_download(manifest, info_dict):
         UNSUPPORTED_FEATURES = (
@@ -53,10 +62,12 @@ class HlsFD(FragmentFD):
         )
         check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
         check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest)
-        check_results.append(not info_dict.get('is_live'))
         return all(check_results)
 
     def real_download(self, filename, info_dict):
+        if info_dict.get('is_live'):
+            return self._delegate_to_ffmpeg(filename, info_dict)
+
         man_url = info_dict['url']
         self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
 
@@ -68,13 +79,7 @@ class HlsFD(FragmentFD):
             if info_dict.get('extra_param_to_segment_url'):
                 self.report_error('pycrypto not found. Please install it.')
                 return False
-            self.report_warning(
-                'hlsnative has detected features it does not support, '
-                'extraction will be delegated to ffmpeg')
-            fd = FFmpegFD(self.ydl, self.params)
-            for ph in self._progress_hooks:
-                fd.add_progress_hook(ph)
-            return fd.real_download(filename, info_dict)
+            return self._delegate_to_ffmpeg(filename, info_dict)
 
         total_frags = 0
         for line in s.splitlines():
index 55a9322a753829e90715a76bc91e06828c460531..9f8a71262883f21f9180211e3639991030e47d38 100644 (file)
@@ -25,7 +25,8 @@ class AddAnimeIE(InfoExtractor):
             'ext': 'mp4',
             'description': 'One Piece 606',
             'title': 'One Piece 606',
-        }
+        },
+        'skip': 'Video is gone',
     }, {
         'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
         'only_matching': True,
index 4d655bd5e1c3a6c4afae98711c3b7a80c5ac58fc..1b2d364cab452da835d7ba8cc78325825c0fcf20 100644 (file)
@@ -36,6 +36,11 @@ MSO_INFO = {
         'username_field': 'Ecom_User_ID',
         'password_field': 'Ecom_Password',
     },
+    'Charter_Direct': {
+        'name': 'Charter Spectrum',
+        'username_field': 'IDToken1',
+        'password_field': 'IDToken2',
+    },
     'thr030': {
         'name': '3 Rivers Communications'
     },
@@ -1453,6 +1458,8 @@ class AdobePassIE(InfoExtractor):
                     self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
                     count += 1
                     continue
+                if '<error' in authorize:
+                    raise ExtractorError(xml_text(authorize, 'details'), expected=True)
                 authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
                 requestor_info[guid] = authz_token
                 self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
index e0a0f7c57b83c7a715e7a39c16a8a71df1cbb500..b774d6db8954bf2b92800e14f7916d102b0bd8e8 100644 (file)
@@ -4,15 +4,10 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse_urlparse,
-    compat_urlparse,
-)
+from ..compat import compat_xpath
 from ..utils import (
     ExtractorError,
     int_or_none,
-    update_url_query,
-    xpath_element,
     xpath_text,
 )
 
@@ -43,7 +38,8 @@ class AfreecaTVIE(InfoExtractor):
             'uploader': 'dailyapril',
             'uploader_id': 'dailyapril',
             'upload_date': '20160503',
-        }
+        },
+        'skip': 'Video is gone',
     }, {
         'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
         'info_dict': {
@@ -71,6 +67,19 @@ class AfreecaTVIE(InfoExtractor):
                 'upload_date': '20160502',
             },
         }],
+        'skip': 'Video is gone',
+    }, {
+        'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
+        'info_dict': {
+            'id': '18650793',
+            'ext': 'flv',
+            'uploader': '윈아디',
+            'uploader_id': 'badkids',
+            'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
+        },
+        'params': {
+            'skip_download': True,  # requires rtmpdump
+        },
     }, {
         'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
         'only_matching': True,
@@ -90,40 +99,33 @@ class AfreecaTVIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        parsed_url = compat_urllib_parse_urlparse(url)
-        info_url = compat_urlparse.urlunparse(parsed_url._replace(
-            netloc='afbbs.afreecatv.com:8080',
-            path='/api/video/get_video_info.php'))
 
         video_xml = self._download_xml(
-            update_url_query(info_url, {'nTitleNo': video_id}), video_id)
+            'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
+            video_id, query={'nTitleNo': video_id})
 
-        if xpath_element(video_xml, './track/video/file') is None:
+        video_element = video_xml.findall(compat_xpath('./track/video'))[1]
+        if video_element is None or video_element.text is None:
             raise ExtractorError('Specified AfreecaTV video does not exist',
                                  expected=True)
 
-        title = xpath_text(video_xml, './track/title', 'title')
+        video_url_raw = video_element.text
+
+        app, playpath = video_url_raw.split('mp4:')
+
+        title = xpath_text(video_xml, './track/title', 'title', fatal=True)
         uploader = xpath_text(video_xml, './track/nickname', 'uploader')
         uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
         duration = int_or_none(xpath_text(video_xml, './track/duration',
                                           'duration'))
         thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
 
-        entries = []
-        for i, video_file in enumerate(video_xml.findall('./track/video/file')):
-            video_key = self.parse_video_key(video_file.get('key', ''))
-            if not video_key:
-                continue
-            entries.append({
-                'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
-                'title': title,
-                'upload_date': video_key.get('upload_date'),
-                'duration': int_or_none(video_file.get('duration')),
-                'url': video_file.text,
-            })
-
-        info = {
+        return {
             'id': video_id,
+            'url': app,
+            'ext': 'flv',
+            'play_path': 'mp4:' + playpath,
+            'rtmp_live': True,  # downloading won't end without this
             'title': title,
             'uploader': uploader,
             'uploader_id': uploader_id,
@@ -131,20 +133,6 @@ class AfreecaTVIE(InfoExtractor):
             'thumbnail': thumbnail,
         }
 
-        if len(entries) > 1:
-            info['_type'] = 'multi_video'
-            info['entries'] = entries
-        elif len(entries) == 1:
-            info['url'] = entries[0]['url']
-            info['upload_date'] = entries[0].get('upload_date')
-        else:
-            raise ExtractorError(
-                'No files found for the specified AfreecaTV video, either'
-                ' the URL is incorrect or the video has been made private.',
-                expected=True)
-
-        return info
-
 
 class AfreecaTVGlobalIE(AfreecaTVIE):
     IE_NAME = 'afreecatv:global'
index 50ffb442dd051be347e2c79c2d4a11dacb9f574b..4495ddbb079760bc3c35611b4126f6086a996a63 100644 (file)
@@ -93,8 +93,7 @@ class ArkenaIE(InfoExtractor):
                 exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
                 if kind == 'm3u8' or 'm3u8' in exts:
                     formats.extend(self._extract_m3u8_formats(
-                        f_url, video_id, 'mp4',
-                        entry_protocol='m3u8' if is_live else 'm3u8_native',
+                        f_url, video_id, 'mp4', 'm3u8_native',
                         m3u8_id=kind, fatal=False, live=is_live))
                 elif kind == 'flash' or 'f4m' in exts:
                     formats.extend(self._extract_f4m_formats(
index e3c669830343bb4f698dc342adebbd764877fd4b..99af6dc5ae5f7a59e306b9d351f82179ba599488 100644 (file)
@@ -90,7 +90,8 @@ class AtresPlayerIE(InfoExtractor):
             request, None, 'Logging in as %s' % username)
 
         error = self._html_search_regex(
-            r'(?s)<ul class="list_error">(.+?)</ul>', response, 'error', default=None)
+            r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',
+            response, 'error', default=None)
         if error:
             raise ExtractorError(
                 'Unable to login: %s' % error, expected=True)
@@ -155,13 +156,17 @@ class AtresPlayerIE(InfoExtractor):
             if format_id == 'token' or not video_url.startswith('http'):
                 continue
             if 'geodeswowsmpra3player' in video_url:
-                f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
-                f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
+                f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
+                f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
                 # this videos are protected by DRM, the f4m downloader doesn't support them
                 continue
-            else:
-                f4m_url = video_url[:-9] + '/manifest.f4m'
-            formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
+            video_url_hd = video_url.replace('free_es', 'es')
+            formats.extend(self._extract_f4m_formats(
+                video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds',
+                fatal=False))
+            formats.extend(self._extract_mpd_formats(
+                video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash',
+                fatal=False))
         self._sort_formats(formats)
 
         path_data = player.get('pathData')
diff --git a/youtube_dl/extractor/atvat.py b/youtube_dl/extractor/atvat.py
new file mode 100644 (file)
index 0000000..1584d53
--- /dev/null
@@ -0,0 +1,73 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    unescapeHTML,
+)
+
+
+class ATVAtIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)'
+    _TESTS = [{
+        'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/',
+        'md5': 'c3b6b975fb3150fc628572939df205f2',
+        'info_dict': {
+            'id': '1698447',
+            'ext': 'mp4',
+            'title': 'DI, 21.03.17 | 20:05 Uhr 1/1',
+        }
+    }, {
+        'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        video_data = self._parse_json(unescapeHTML(self._search_regex(
+            r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
+            webpage, 'player data')), display_id)['config']['initial_video']
+
+        video_id = video_data['id']
+        video_title = video_data['title']
+
+        parts = []
+        for part in video_data.get('parts', []):
+            part_id = part['id']
+            part_title = part['title']
+
+            formats = []
+            for source in part.get('sources', []):
+                source_url = source.get('src')
+                if not source_url:
+                    continue
+                ext = determine_ext(source_url)
+                if ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        source_url, part_id, 'mp4', 'm3u8_native',
+                        m3u8_id='hls', fatal=False))
+                else:
+                    formats.append({
+                        'format_id': source.get('delivery'),
+                        'url': source_url,
+                    })
+            self._sort_formats(formats)
+
+            parts.append({
+                'id': part_id,
+                'title': part_title,
+                'thumbnail': part.get('preview_image_url'),
+                'duration': int_or_none(part.get('duration')),
+                'is_live': part.get('is_livestream'),
+                'formats': formats,
+            })
+
+        return {
+            '_type': 'multi_video',
+            'id': video_id,
+            'title': video_title,
+            'entries': parts,
+        }
index 1f5b6ed92a2fa4c79b07361eb02797c118241832..8820a391468e7a6ecb168704eddf416fdc34ede4 100644 (file)
@@ -21,10 +21,11 @@ class BellMediaIE(InfoExtractor):
                 animalplanet|
                 bravo|
                 mtv|
-                space
+                space|
+                etalk
             )\.ca|
             much\.com
-        )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
+        )/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
     _TESTS = [{
         'url': 'http://www.ctv.ca/video/player?vid=706966',
         'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
@@ -58,6 +59,9 @@ class BellMediaIE(InfoExtractor):
     }, {
         'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
         'only_matching': True,
+    }, {
+        'url': 'http://www.etalk.ca/video?videoid=663455',
+        'only_matching': True,
     }]
     _DOMAINS = {
         'thecomedynetwork': 'comedy',
@@ -65,6 +69,7 @@ class BellMediaIE(InfoExtractor):
         'sciencechannel': 'discsci',
         'investigationdiscovery': 'invdisc',
         'animalplanet': 'aniplan',
+        'etalk': 'ctv',
     }
 
     def _real_extract(self, url):
diff --git a/youtube_dl/extractor/bostonglobe.py b/youtube_dl/extractor/bostonglobe.py
new file mode 100644 (file)
index 0000000..57882fb
--- /dev/null
@@ -0,0 +1,72 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+from ..utils import (
+    extract_attributes,
+)
+
+
+class BostonGlobeIE(InfoExtractor):
+    _VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?'
+    _TESTS = [
+        {
+            'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html',
+            'md5': '0a62181079c85c2d2b618c9a738aedaf',
+            'info_dict': {
+                'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood',
+                'id': '5320421710001',
+                'ext': 'mp4',
+                'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.',
+                'timestamp': 1486877593,
+                'upload_date': '20170212',
+                'uploader_id': '245991542',
+            },
+        },
+        {
+            # Embedded youtube video; we hand it off to the Generic extractor.
+            'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html',
+            'md5': '582b40327089d5c0c949b3c54b13c24b',
+            'info_dict': {
+                'title': "Who Is Matt Damon's Favorite Batman?",
+                'id': 'ZW1QCnlA6Qc',
+                'ext': 'mp4',
+                'upload_date': '20170217',
+                'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb',
+                'uploader': 'The Late Late Show with James Corden',
+                'uploader_id': 'TheLateLateShow',
+            },
+            'expected_warnings': ['404'],
+        },
+    ]
+
+    def _real_extract(self, url):
+        page_id = self._match_id(url)
+        webpage = self._download_webpage(url, page_id)
+
+        page_title = self._og_search_title(webpage, default=None)
+
+        # <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject">
+        entries = []
+        for video in re.findall(r'(?i)(<video[^>]+>)', webpage):
+            attrs = extract_attributes(video)
+
+            video_id = attrs.get('data-brightcove-video-id')
+            account_id = attrs.get('data-account')
+            player_id = attrs.get('data-player')
+            embed = attrs.get('data-embed')
+
+            if video_id and account_id and player_id and embed:
+                entries.append(
+                    'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
+                    % (account_id, player_id, embed, video_id))
+
+        if len(entries) == 0:
+            return self.url_result(url, 'Generic')
+        elif len(entries) == 1:
+            return self.url_result(entries[0], 'BrightcoveNew')
+        else:
+            return self.playlist_from_matches(entries, page_id, page_title, ie='BrightcoveNew')
index 27685eed0188312154463066293018496e875e5c..46ef8e605286ac0b079dfe30720de6a1ba9f48f5 100644 (file)
@@ -193,7 +193,13 @@ class BrightcoveLegacyIE(InfoExtractor):
         if videoPlayer is not None:
             if isinstance(videoPlayer, list):
                 videoPlayer = videoPlayer[0]
-            if not (videoPlayer.isdigit() or videoPlayer.startswith('ref:')):
+            videoPlayer = videoPlayer.strip()
+            # UUID is also possible for videoPlayer (e.g.
+            # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd
+            # or http://www8.hp.com/cn/zh/home.html)
+            if not (re.match(
+                    r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$',
+                    videoPlayer) or videoPlayer.startswith('ref:')):
                 return None
             params['@videoPlayer'] = videoPlayer
         linkBase = find_param('linkBaseURL')
@@ -515,6 +521,9 @@ class BrightcoveNewIE(InfoExtractor):
         return entries
 
     def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
+        self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+
         account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
 
         webpage = self._download_webpage(
@@ -544,8 +553,10 @@ class BrightcoveNewIE(InfoExtractor):
         except ExtractorError as e:
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                 json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
-                raise ExtractorError(
-                    json_data.get('message') or json_data['error_code'], expected=True)
+                message = json_data.get('message') or json_data['error_code']
+                if json_data.get('error_subcode') == 'CLIENT_GEO':
+                    self.raise_geo_restricted(msg=message)
+                raise ExtractorError(message, expected=True)
             raise
 
         title = json_data['name'].strip()
index b1dfacf8094f92493ad6cc95b6fe758b3b81f4fc..dd2529a6dc7742f4a37ad2c37d2c0c20deb97788 100644 (file)
@@ -160,8 +160,7 @@ class CeskaTelevizeIE(InfoExtractor):
                 for format_id, stream_url in item.get('streamUrls', {}).items():
                     if 'playerType=flash' in stream_url:
                         stream_formats = self._extract_m3u8_formats(
-                            stream_url, playlist_id, 'mp4',
-                            entry_protocol='m3u8' if is_live else 'm3u8_native',
+                            stream_url, playlist_id, 'mp4', 'm3u8_native',
                             m3u8_id='hls-%s' % format_id, fatal=False)
                     else:
                         stream_formats = self._extract_mpd_formats(
index 865dbcaba5016eb957c41bac43966e2a75044f0b..e928942465fbae45a0854172b90987ee65b355a0 100644 (file)
@@ -4,62 +4,62 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    clean_html,
     ExtractorError,
-    parse_filesize,
+    int_or_none,
+    parse_iso8601,
     qualities,
+    unescapeHTML,
 )
 
 
 class Channel9IE(InfoExtractor):
-    '''
-    Common extractor for channel9.msdn.com.
-
-    The type of provided URL (video or playlist) is determined according to
-    meta Search.PageType from web page HTML rather than URL itself, as it is
-    not always possible to do.
-    '''
     IE_DESC = 'Channel 9'
     IE_NAME = 'channel9'
-    _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
+    _VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
 
     _TESTS = [{
         'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
-        'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
+        'md5': '32083d4eaf1946db6d454313f44510ca',
         'info_dict': {
-            'id': 'Events/TechEd/Australia/2013/KOS002',
-            'ext': 'mp4',
+            'id': '6c413323-383a-49dc-88f9-a22800cab024',
+            'ext': 'wmv',
             'title': 'Developer Kick-Off Session: Stuff We Love',
-            'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
+            'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731',
             'duration': 4576,
-            'thumbnail': r're:http://.*\.jpg',
+            'thumbnail': r're:https?://.*\.jpg',
+            'timestamp': 1377717420,
+            'upload_date': '20130828',
             'session_code': 'KOS002',
-            'session_day': 'Day 1',
             'session_room': 'Arena 1A',
-            'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug',
-                                 'Mads Kristensen'],
+            'session_speakers': ['Andrew Coates', 'Brady Gaster', 'Mads Kristensen', 'Ed Blankenship', 'Patrick Klug'],
         },
     }, {
         'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
-        'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
+        'md5': 'dcf983ee6acd2088e7188c3cf79b46bc',
         'info_dict': {
-            'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
-            'ext': 'mp4',
+            'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024',
+            'ext': 'wmv',
             'title': 'Self-service BI with Power BI - nuclear testing',
-            'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
+            'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54',
             'duration': 1540,
-            'thumbnail': r're:http://.*\.jpg',
+            'thumbnail': r're:https?://.*\.jpg',
+            'timestamp': 1386381991,
+            'upload_date': '20131207',
             'authors': ['Mike Wilmot'],
         },
     }, {
         # low quality mp4 is best
         'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
         'info_dict': {
-            'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
+            'id': '33ad69d2-6a4e-4172-83a1-a523013dec76',
             'ext': 'mp4',
             'title': 'Ranges for the Standard Library',
-            'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
+            'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372',
             'duration': 5646,
-            'thumbnail': r're:http://.*\.jpg',
+            'thumbnail': r're:https?://.*\.jpg',
+            'upload_date': '20150930',
+            'timestamp': 1443640735,
         },
         'params': {
             'skip_download': True,
@@ -70,7 +70,7 @@ class Channel9IE(InfoExtractor):
             'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
             'title': 'Channel 9',
         },
-        'playlist_count': 2,
+        'playlist_mincount': 100,
     }, {
         'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
         'only_matching': True,
@@ -81,189 +81,6 @@ class Channel9IE(InfoExtractor):
 
     _RSS_URL = 'http://channel9.msdn.com/%s/RSS'
 
-    def _formats_from_html(self, html):
-        FORMAT_REGEX = r'''
-            (?x)
-            <a\s+href="(?P<url>[^"]+)">(?P<quality>[^<]+)</a>\s*
-            <span\s+class="usage">\((?P<note>[^\)]+)\)</span>\s*
-            (?:<div\s+class="popup\s+rounded">\s*
-            <h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
-            </div>)?                                                # File size part may be missing
-        '''
-        quality = qualities((
-            'MP3', 'MP4',
-            'Low Quality WMV', 'Low Quality MP4',
-            'Mid Quality WMV', 'Mid Quality MP4',
-            'High Quality WMV', 'High Quality MP4'))
-        formats = [{
-            'url': x.group('url'),
-            'format_id': x.group('quality'),
-            'format_note': x.group('note'),
-            'format': '%s (%s)' % (x.group('quality'), x.group('note')),
-            'filesize_approx': parse_filesize(x.group('filesize')),
-            'quality': quality(x.group('quality')),
-            'vcodec': 'none' if x.group('note') == 'Audio only' else None,
-        } for x in list(re.finditer(FORMAT_REGEX, html))]
-
-        self._sort_formats(formats)
-
-        return formats
-
-    def _extract_title(self, html):
-        title = self._html_search_meta('title', html, 'title')
-        if title is None:
-            title = self._og_search_title(html)
-            TITLE_SUFFIX = ' (Channel 9)'
-            if title is not None and title.endswith(TITLE_SUFFIX):
-                title = title[:-len(TITLE_SUFFIX)]
-        return title
-
-    def _extract_description(self, html):
-        DESCRIPTION_REGEX = r'''(?sx)
-            <div\s+class="entry-content">\s*
-            <div\s+id="entry-body">\s*
-            (?P<description>.+?)\s*
-            </div>\s*
-            </div>
-        '''
-        m = re.search(DESCRIPTION_REGEX, html)
-        if m is not None:
-            return m.group('description')
-        return self._html_search_meta('description', html, 'description')
-
-    def _extract_duration(self, html):
-        m = re.search(r'"length": *"(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
-        return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None
-
-    def _extract_slides(self, html):
-        m = re.search(r'<a href="(?P<slidesurl>[^"]+)" class="slides">Slides</a>', html)
-        return m.group('slidesurl') if m is not None else None
-
-    def _extract_zip(self, html):
-        m = re.search(r'<a href="(?P<zipurl>[^"]+)" class="zip">Zip</a>', html)
-        return m.group('zipurl') if m is not None else None
-
-    def _extract_avg_rating(self, html):
-        m = re.search(r'<p class="avg-rating">Avg Rating: <span>(?P<avgrating>[^<]+)</span></p>', html)
-        return float(m.group('avgrating')) if m is not None else 0
-
-    def _extract_rating_count(self, html):
-        m = re.search(r'<div class="rating-count">\((?P<ratingcount>[^<]+)\)</div>', html)
-        return int(self._fix_count(m.group('ratingcount'))) if m is not None else 0
-
-    def _extract_view_count(self, html):
-        m = re.search(r'<li class="views">\s*<span class="count">(?P<viewcount>[^<]+)</span> Views\s*</li>', html)
-        return int(self._fix_count(m.group('viewcount'))) if m is not None else 0
-
-    def _extract_comment_count(self, html):
-        m = re.search(r'<li class="comments">\s*<a href="#comments">\s*<span class="count">(?P<commentcount>[^<]+)</span> Comments\s*</a>\s*</li>', html)
-        return int(self._fix_count(m.group('commentcount'))) if m is not None else 0
-
-    def _fix_count(self, count):
-        return int(str(count).replace(',', '')) if count is not None else None
-
-    def _extract_authors(self, html):
-        m = re.search(r'(?s)<li class="author">(.*?)</li>', html)
-        if m is None:
-            return None
-        return re.findall(r'<a href="/Niners/[^"]+">([^<]+)</a>', m.group(1))
-
-    def _extract_session_code(self, html):
-        m = re.search(r'<li class="code">\s*(?P<code>.+?)\s*</li>', html)
-        return m.group('code') if m is not None else None
-
-    def _extract_session_day(self, html):
-        m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html)
-        return m.group('day').strip() if m is not None else None
-
-    def _extract_session_room(self, html):
-        m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html)
-        return m.group('room') if m is not None else None
-
-    def _extract_session_speakers(self, html):
-        return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html)
-
-    def _extract_content(self, html, content_path):
-        # Look for downloadable content
-        formats = self._formats_from_html(html)
-        slides = self._extract_slides(html)
-        zip_ = self._extract_zip(html)
-
-        # Nothing to download
-        if len(formats) == 0 and slides is None and zip_ is None:
-            self._downloader.report_warning('None of recording, slides or zip are available for %s' % content_path)
-            return
-
-        # Extract meta
-        title = self._extract_title(html)
-        description = self._extract_description(html)
-        thumbnail = self._og_search_thumbnail(html)
-        duration = self._extract_duration(html)
-        avg_rating = self._extract_avg_rating(html)
-        rating_count = self._extract_rating_count(html)
-        view_count = self._extract_view_count(html)
-        comment_count = self._extract_comment_count(html)
-
-        common = {
-            '_type': 'video',
-            'id': content_path,
-            'description': description,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'avg_rating': avg_rating,
-            'rating_count': rating_count,
-            'view_count': view_count,
-            'comment_count': comment_count,
-        }
-
-        result = []
-
-        if slides is not None:
-            d = common.copy()
-            d.update({'title': title + '-Slides', 'url': slides})
-            result.append(d)
-
-        if zip_ is not None:
-            d = common.copy()
-            d.update({'title': title + '-Zip', 'url': zip_})
-            result.append(d)
-
-        if len(formats) > 0:
-            d = common.copy()
-            d.update({'title': title, 'formats': formats})
-            result.append(d)
-
-        return result
-
-    def _extract_entry_item(self, html, content_path):
-        contents = self._extract_content(html, content_path)
-        if contents is None:
-            return contents
-
-        if len(contents) > 1:
-            raise ExtractorError('Got more than one entry')
-        result = contents[0]
-        result['authors'] = self._extract_authors(html)
-
-        return result
-
-    def _extract_session(self, html, content_path):
-        contents = self._extract_content(html, content_path)
-        if contents is None:
-            return contents
-
-        session_meta = {
-            'session_code': self._extract_session_code(html),
-            'session_day': self._extract_session_day(html),
-            'session_room': self._extract_session_room(html),
-            'session_speakers': self._extract_session_speakers(html),
-        }
-
-        for content in contents:
-            content.update(session_meta)
-
-        return self.playlist_result(contents)
-
     def _extract_list(self, video_id, rss_url=None):
         if not rss_url:
             rss_url = self._RSS_URL % video_id
@@ -274,9 +91,7 @@ class Channel9IE(InfoExtractor):
         return self.playlist_result(entries, video_id, title_text)
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        content_path = mobj.group('contentpath')
-        rss = mobj.group('rss')
+        content_path, rss = re.match(self._VALID_URL, url).groups()
 
         if rss:
             return self._extract_list(content_path, url)
@@ -284,17 +99,158 @@ class Channel9IE(InfoExtractor):
         webpage = self._download_webpage(
             url, content_path, 'Downloading web page')
 
-        page_type = self._search_regex(
-            r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2',
-            webpage, 'page type', default=None, group='pagetype')
-        if page_type:
-            if page_type == 'Entry':      # Any 'item'-like page, may contain downloadable content
-                return self._extract_entry_item(webpage, content_path)
-            elif page_type == 'Session':  # Event session page, may contain downloadable content
-                return self._extract_session(webpage, content_path)
-            elif page_type == 'Event':
-                return self._extract_list(content_path)
+        episode_data = self._search_regex(
+            r"data-episode='([^']+)'", webpage, 'episode data', default=None)
+        if episode_data:
+            episode_data = self._parse_json(unescapeHTML(
+                episode_data), content_path)
+            content_id = episode_data['contentId']
+            is_session = '/Sessions(' in episode_data['api']
+            content_url = 'https://channel9.msdn.com/odata' + episode_data['api']
+            if is_session:
+                content_url += '?$expand=Speakers'
+            else:
+                content_url += '?$expand=Authors'
+            content_data = self._download_json(content_url, content_id)
+            title = content_data['Title']
+
+            QUALITIES = (
+                'mp3',
+                'wmv', 'mp4',
+                'wmv-low', 'mp4-low',
+                'wmv-mid', 'mp4-mid',
+                'wmv-high', 'mp4-high',
+            )
+
+            quality_key = qualities(QUALITIES)
+
+            def quality(quality_id, format_url):
+                return (len(QUALITIES) if '_Source.' in format_url
+                        else quality_key(quality_id))
+
+            formats = []
+            urls = set()
+
+            SITE_QUALITIES = {
+                'MP3': 'mp3',
+                'MP4': 'mp4',
+                'Low Quality WMV': 'wmv-low',
+                'Low Quality MP4': 'mp4-low',
+                'Mid Quality WMV': 'wmv-mid',
+                'Mid Quality MP4': 'mp4-mid',
+                'High Quality WMV': 'wmv-high',
+                'High Quality MP4': 'mp4-high',
+            }
+
+            formats_select = self._search_regex(
+                r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage,
+                'formats select', default=None)
+            if formats_select:
+                for mobj in re.finditer(
+                        r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<',
+                        formats_select):
+                    format_url = mobj.group('url')
+                    if format_url in urls:
+                        continue
+                    urls.add(format_url)
+                    format_id = mobj.group('format')
+                    quality_id = SITE_QUALITIES.get(format_id, format_id)
+                    formats.append({
+                        'url': format_url,
+                        'format_id': quality_id,
+                        'quality': quality(quality_id, format_url),
+                        'vcodec': 'none' if quality_id == 'mp3' else None,
+                    })
+
+            API_QUALITIES = {
+                'VideoMP4Low': 'mp4-low',
+                'VideoWMV': 'wmv-mid',
+                'VideoMP4Medium': 'mp4-mid',
+                'VideoMP4High': 'mp4-high',
+                'VideoWMVHQ': 'wmv-hq',
+            }
+
+            for format_id, q in API_QUALITIES.items():
+                q_url = content_data.get(format_id)
+                if not q_url or q_url in urls:
+                    continue
+                urls.add(q_url)
+                formats.append({
+                    'url': q_url,
+                    'format_id': q,
+                    'quality': quality(q, q_url),
+                })
+
+            self._sort_formats(formats)
+
+            slides = content_data.get('Slides')
+            zip_file = content_data.get('ZipFile')
+
+            if not formats and not slides and not zip_file:
+                raise ExtractorError(
+                    'None of recording, slides or zip are available for %s' % content_path)
+
+            subtitles = {}
+            for caption in content_data.get('Captions', []):
+                caption_url = caption.get('Url')
+                if not caption_url:
+                    continue
+                subtitles.setdefault(caption.get('Language', 'en'), []).append({
+                    'url': caption_url,
+                    'ext': 'vtt',
+                })
+
+            common = {
+                'id': content_id,
+                'title': title,
+                'description': clean_html(content_data.get('Description') or content_data.get('Body')),
+                'thumbnail': content_data.get('Thumbnail') or content_data.get('VideoPlayerPreviewImage'),
+                'duration': int_or_none(content_data.get('MediaLengthInSeconds')),
+                'timestamp': parse_iso8601(content_data.get('PublishedDate')),
+                'avg_rating': int_or_none(content_data.get('Rating')),
+                'rating_count': int_or_none(content_data.get('RatingCount')),
+                'view_count': int_or_none(content_data.get('Views')),
+                'comment_count': int_or_none(content_data.get('CommentCount')),
+                'subtitles': subtitles,
+            }
+            if is_session:
+                speakers = []
+                for s in content_data.get('Speakers', []):
+                    speaker_name = s.get('FullName')
+                    if not speaker_name:
+                        continue
+                    speakers.append(speaker_name)
+
+                common.update({
+                    'session_code': content_data.get('Code'),
+                    'session_room': content_data.get('Room'),
+                    'session_speakers': speakers,
+                })
             else:
-                raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
-        else:  # Assuming list
+                authors = []
+                for a in content_data.get('Authors', []):
+                    author_name = a.get('DisplayName')
+                    if not author_name:
+                        continue
+                    authors.append(author_name)
+                common['authors'] = authors
+
+            contents = []
+
+            if slides:
+                d = common.copy()
+                d.update({'title': title + '-Slides', 'url': slides})
+                contents.append(d)
+
+            if zip_file:
+                d = common.copy()
+                d.update({'title': title + '-Zip', 'url': zip_file})
+                contents.append(d)
+
+            if formats:
+                d = common.copy()
+                d.update({'title': title, 'formats': formats})
+                contents.append(d)
+            return self.playlist_result(contents)
+        else:
             return self._extract_list(content_path)
index ae5ba0015a0e5026f2db38eaa103417ddc8ce1da..9bc8dbea449509cf275d10470d6e01f337288a21 100644 (file)
@@ -1,97 +1,56 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from ..compat import (
-    compat_parse_qs,
-    compat_HTTPError,
-)
 from ..utils import (
-    ExtractorError,
-    HEADRequest,
-    remove_end,
+    str_to_int,
+    unified_strdate,
 )
 
 
 class CloudyIE(InfoExtractor):
     _IE_DESC = 'cloudy.ec'
-    _VALID_URL = r'''(?x)
-        https?://(?:www\.)?cloudy\.ec/
-        (?:v/|embed\.php\?id=)
-        (?P<id>[A-Za-z0-9]+)
-        '''
-    _EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s'
-    _API_URL = 'http://www.cloudy.ec/api/player.api.php'
-    _MAX_TRIES = 2
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
+    _TESTS = [{
         'url': 'https://www.cloudy.ec/v/af511e2527aac',
-        'md5': '5cb253ace826a42f35b4740539bedf07',
+        'md5': '29832b05028ead1b58be86bf319397ca',
         'info_dict': {
             'id': 'af511e2527aac',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Funny Cats and Animals Compilation june 2013',
+            'upload_date': '20130913',
+            'view_count': int,
         }
-    }
-
-    def _extract_video(self, video_id, file_key, error_url=None, try_num=0):
-
-        if try_num > self._MAX_TRIES - 1:
-            raise ExtractorError('Unable to extract video URL', expected=True)
-
-        form = {
-            'file': video_id,
-            'key': file_key,
-        }
-
-        if error_url:
-            form.update({
-                'numOfErrors': try_num,
-                'errorCode': '404',
-                'errorUrl': error_url,
-            })
+    }, {
+        'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac',
+        'only_matching': True,
+    }]
 
-        player_data = self._download_webpage(
-            self._API_URL, video_id, 'Downloading player data', query=form)
-        data = compat_parse_qs(player_data)
-
-        try_num += 1
-
-        if 'error' in data:
-            raise ExtractorError(
-                '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),
-                expected=True)
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
 
-        title = data.get('title', [None])[0]
-        if title:
-            title = remove_end(title, '&asdasdas').strip()
+        webpage = self._download_webpage(
+            'http://www.cloudy.ec/embed.php?id=%s' % video_id, video_id)
 
-        video_url = data.get('url', [None])[0]
+        info = self._parse_html5_media_entries(url, webpage, video_id)[0]
 
-        if video_url:
-            try:
-                self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')
-            except ExtractorError as e:
-                if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
-                    self.report_warning('Invalid video URL, requesting another', video_id)
-                    return self._extract_video(video_id, file_key, video_url, try_num)
+        webpage = self._download_webpage(
+            'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False)
 
-        return {
-            'id': video_id,
-            'url': video_url,
-            'title': title,
-        }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        if webpage:
+            info.update({
+                'title': self._search_regex(
+                    r'<h\d[^>]*>([^<]+)<', webpage, 'title'),
+                'upload_date': unified_strdate(self._search_regex(
+                    r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage,
+                    'upload date', fatal=False)),
+                'view_count': str_to_int(self._search_regex(
+                    r'([\d,.]+) views<', webpage, 'view count', fatal=False)),
+            })
 
-        url = self._EMBED_URL % video_id
-        webpage = self._download_webpage(url, video_id)
+        if not info.get('title'):
+            info['title'] = video_id
 
-        file_key = self._search_regex(
-            [r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
-            webpage, 'file_key')
+        info['id'] = video_id
 
-        return self._extract_video(video_id, file_key)
+        return info
index c2ca73ee1756519e90e19fc7512b1a77d834b2ee..6c3c095f78cec4f44951f0424f20c6828e2462d7 100644 (file)
@@ -36,34 +36,35 @@ from ..utils import (
     clean_html,
     compiled_regex_type,
     determine_ext,
+    determine_protocol,
     error_to_compat_str,
     ExtractorError,
+    extract_attributes,
     fix_xml_ampersands,
     float_or_none,
     GeoRestrictedError,
     GeoUtils,
     int_or_none,
     js_to_json,
+    mimetype2ext,
+    orderedSet,
+    parse_codecs,
+    parse_duration,
     parse_iso8601,
+    parse_m3u8_attributes,
     RegexNotFoundError,
-    sanitize_filename,
     sanitized_Request,
+    sanitize_filename,
     unescapeHTML,
     unified_strdate,
     unified_timestamp,
+    update_Request,
+    update_url_query,
+    urljoin,
     url_basename,
     xpath_element,
     xpath_text,
     xpath_with_ns,
-    determine_protocol,
-    parse_duration,
-    mimetype2ext,
-    update_Request,
-    update_url_query,
-    parse_m3u8_attributes,
-    extract_attributes,
-    parse_codecs,
-    urljoin,
 )
 
 
@@ -714,6 +715,13 @@ class InfoExtractor(object):
             video_info['title'] = video_title
         return video_info
 
+    def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None):
+        urlrs = orderedSet(
+            self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
+            for m in matches)
+        return self.playlist_result(
+            urlrs, playlist_id=video_id, playlist_title=video_title)
+
     @staticmethod
     def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
         """Returns a playlist"""
@@ -2204,56 +2212,9 @@ class InfoExtractor(object):
 
             this_video_id = video_id or video_data['mediaid']
 
-            formats = []
-            for source in video_data['sources']:
-                source_url = self._proto_relative_url(source['file'])
-                if base_url:
-                    source_url = compat_urlparse.urljoin(base_url, source_url)
-                source_type = source.get('type') or ''
-                ext = mimetype2ext(source_type) or determine_ext(source_url)
-                if source_type == 'hls' or ext == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(
-                        source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
-                elif ext == 'mpd':
-                    formats.extend(self._extract_mpd_formats(
-                        source_url, this_video_id, mpd_id=mpd_id, fatal=False))
-                # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
-                elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
-                    formats.append({
-                        'url': source_url,
-                        'vcodec': 'none',
-                        'ext': ext,
-                    })
-                else:
-                    height = int_or_none(source.get('height'))
-                    if height is None:
-                        # Often no height is provided but there is a label in
-                        # format like 1080p.
-                        height = int_or_none(self._search_regex(
-                            r'^(\d{3,})[pP]$', source.get('label') or '',
-                            'height', default=None))
-                    a_format = {
-                        'url': source_url,
-                        'width': int_or_none(source.get('width')),
-                        'height': height,
-                        'ext': ext,
-                    }
-                    if source_url.startswith('rtmp'):
-                        a_format['ext'] = 'flv'
-
-                        # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
-                        # of jwplayer.flash.swf
-                        rtmp_url_parts = re.split(
-                            r'((?:mp4|mp3|flv):)', source_url, 1)
-                        if len(rtmp_url_parts) == 3:
-                            rtmp_url, prefix, play_path = rtmp_url_parts
-                            a_format.update({
-                                'url': rtmp_url,
-                                'play_path': prefix + play_path,
-                            })
-                        if rtmp_params:
-                            a_format.update(rtmp_params)
-                    formats.append(a_format)
+            formats = self._parse_jwplayer_formats(
+                video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
+                mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
             self._sort_formats(formats)
 
             subtitles = {}
@@ -2284,6 +2245,65 @@ class InfoExtractor(object):
         else:
             return self.playlist_result(entries)
 
+    def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
+                                m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+        formats = []
+        for source in jwplayer_sources_data:
+            source_url = self._proto_relative_url(source['file'])
+            if base_url:
+                source_url = compat_urlparse.urljoin(base_url, source_url)
+            source_type = source.get('type') or ''
+            ext = mimetype2ext(source_type) or determine_ext(source_url)
+            if source_type == 'hls' or ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id=m3u8_id, fatal=False))
+            elif ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    source_url, video_id, mpd_id=mpd_id, fatal=False))
+            elif ext == 'smil':
+                formats.extend(self._extract_smil_formats(
+                    source_url, video_id, fatal=False))
+            # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
+            elif source_type.startswith('audio') or ext in (
+                    'oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
+                formats.append({
+                    'url': source_url,
+                    'vcodec': 'none',
+                    'ext': ext,
+                })
+            else:
+                height = int_or_none(source.get('height'))
+                if height is None:
+                    # Often no height is provided but there is a label in
+                    # format like "1080p", "720p SD", or 1080.
+                    height = int_or_none(self._search_regex(
+                        r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
+                        'height', default=None))
+                a_format = {
+                    'url': source_url,
+                    'width': int_or_none(source.get('width')),
+                    'height': height,
+                    'tbr': int_or_none(source.get('bitrate')),
+                    'ext': ext,
+                }
+                if source_url.startswith('rtmp'):
+                    a_format['ext'] = 'flv'
+                    # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
+                    # of jwplayer.flash.swf
+                    rtmp_url_parts = re.split(
+                        r'((?:mp4|mp3|flv):)', source_url, 1)
+                    if len(rtmp_url_parts) == 3:
+                        rtmp_url, prefix, play_path = rtmp_url_parts
+                        a_format.update({
+                            'url': rtmp_url,
+                            'play_path': prefix + play_path,
+                        })
+                    if rtmp_params:
+                        a_format.update(rtmp_params)
+                formats.append(a_format)
+        return formats
+
     def _live_title(self, name):
         """ Generate the title for a live video """
         now = datetime.datetime.now()
index 8d8f605980bdf531c49a0c5d5067223d1f41dc4d..d3463b8747a2109e8efef6b93d70843e542b4590 100644 (file)
@@ -9,13 +9,14 @@ from ..compat import (
     compat_urlparse,
 )
 from ..utils import (
-    orderedSet,
-    remove_end,
-    extract_attributes,
-    mimetype2ext,
     determine_ext,
+    extract_attributes,
     int_or_none,
+    js_to_json,
+    mimetype2ext,
+    orderedSet,
     parse_iso8601,
+    remove_end,
 )
 
 
@@ -66,6 +67,16 @@ class CondeNastIE(InfoExtractor):
             'upload_date': '20130314',
             'timestamp': 1363219200,
         }
+    }, {
+        'url': 'http://video.gq.com/watch/the-closer-with-keith-olbermann-the-only-true-surprise-trump-s-an-idiot?c=series',
+        'info_dict': {
+            'id': '58d1865bfd2e6126e2000015',
+            'ext': 'mp4',
+            'title': 'The Only True Surprise? Trump’s an Idiot',
+            'uploader': 'gq',
+            'upload_date': '20170321',
+            'timestamp': 1490126427,
+        },
     }, {
         # JS embed
         'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js',
@@ -114,26 +125,33 @@ class CondeNastIE(InfoExtractor):
             })
         video_id = query['videoId']
         video_info = None
-        info_page = self._download_webpage(
+        info_page = self._download_json(
             'http://player.cnevids.com/player/video.js',
-            video_id, 'Downloading video info', query=query, fatal=False)
+            video_id, 'Downloading video info', fatal=False, query=query)
         if info_page:
-            video_info = self._parse_json(self._search_regex(
-                r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video']
-        else:
+            video_info = info_page.get('video')
+        if not video_info:
             info_page = self._download_webpage(
                 'http://player.cnevids.com/player/loader.js',
                 video_id, 'Downloading loader info', query=query)
-            video_info = self._parse_json(self._search_regex(
-                r'var\s+video\s*=\s*({.+?});', info_page, 'video info'), video_id)
+            video_info = self._parse_json(
+                self._search_regex(
+                    r'(?s)var\s+config\s*=\s*({.+?});', info_page, 'config'),
+                video_id, transform_source=js_to_json)['video']
+
         title = video_info['title']
 
         formats = []
-        for fdata in video_info.get('sources', [{}])[0]:
+        for fdata in video_info['sources']:
             src = fdata.get('src')
             if not src:
                 continue
             ext = mimetype2ext(fdata.get('type')) or determine_ext(src)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    src, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+                continue
             quality = fdata.get('quality')
             formats.append({
                 'format_id': ext + ('-%s' % quality if quality else ''),
@@ -169,7 +187,6 @@ class CondeNastIE(InfoExtractor):
                 path=remove_end(parsed_url.path, '.js').replace('/embedjs/', '/embed/')))
             url_type = 'embed'
 
-        self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site])
         webpage = self._download_webpage(url, item_id)
 
         if url_type == 'series':
index 9c6cf00ca1995604fb9ec86f7c1aa2bbc333828a..d15fd3744d5a03e383cb36d0dd9e990d061aa6cb 100644 (file)
@@ -177,6 +177,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
             'uploader': 'Kadokawa Pictures Inc.',
             'upload_date': '20170118',
             'series': "KONOSUBA -God's blessing on this wonderful world!",
+            'season': "KONOSUBA -God's blessing on this wonderful world! 2",
             'season_number': 2,
             'episode': 'Give Me Deliverance from this Judicial Injustice!',
             'episode_number': 1,
@@ -222,6 +223,23 @@ class CrunchyrollIE(CrunchyrollBaseIE):
             # just test metadata extraction
             'skip_download': True,
         },
+    }, {
+        # A video with a vastly different season name compared to the series name
+        'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
+        'info_dict': {
+            'id': '590532',
+            'ext': 'mp4',
+            'title': 'Haiyoru! Nyaruani (ONA) Episode 1 – Test',
+            'description': 'Mahiro and Nyaruko talk about official certification.',
+            'uploader': 'TV TOKYO',
+            'upload_date': '20120305',
+            'series': 'Nyarko-san: Another Crawling Chaos',
+            'season': 'Haiyoru! Nyaruani (ONA)',
+        },
+        'params': {
+            # Just test metadata extraction
+            'skip_download': True,
+        },
     }]
 
     _FORMAT_IDS = {
@@ -491,7 +509,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
         # webpage provide more accurate data than series_title from XML
         series = self._html_search_regex(
             r'id=["\']showmedia_about_episode_num[^>]+>\s*<a[^>]+>([^<]+)',
-            webpage, 'series', default=xpath_text(metadata, 'series_title'))
+            webpage, 'series', fatal=False)
+        season = xpath_text(metadata, 'series_title')
 
         episode = xpath_text(metadata, 'episode_title')
         episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
@@ -508,6 +527,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             'uploader': video_uploader,
             'upload_date': video_upload_date,
             'series': series,
+            'season': season,
             'season_number': season_number,
             'episode': episode,
             'episode_number': episode_number,
index 2042493a8c7836ecae4efd23005101cf805116a7..7cd5d4291668705f12fdd973756e36a1ace1cc9f 100644 (file)
@@ -1,17 +1,21 @@
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
     extract_attributes,
+    ExtractorError,
     int_or_none,
     parse_age_limit,
-    ExtractorError,
+    remove_end,
+    unescapeHTML,
 )
 
 
-class DiscoveryGoIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:www\.)?(?:
+class DiscoveryGoBaseIE(InfoExtractor):
+    _VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?:
             discovery|
             investigationdiscovery|
             discoverylife|
@@ -21,18 +25,23 @@ class DiscoveryGoIE(InfoExtractor):
             sciencechannel|
             tlc|
             velocitychannel
-        )go\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'''
+        )go\.com/%s(?P<id>[^/?#&]+)'''
+
+
+class DiscoveryGoIE(DiscoveryGoBaseIE):
+    _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+'
+    _GEO_COUNTRIES = ['US']
     _TEST = {
-        'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/',
+        'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/',
         'info_dict': {
-            'id': '57a33c536b66d1cd0345eeb1',
+            'id': '58c167d86b66d12f2addeb01',
             'ext': 'mp4',
-            'title': 'Kiss First, Ask Questions Later!',
-            'description': 'md5:fe923ba34050eae468bffae10831cb22',
-            'duration': 2579,
-            'series': 'Love at First Kiss',
-            'season_number': 1,
-            'episode_number': 1,
+            'title': 'Reaper Madness',
+            'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78',
+            'duration': 2519,
+            'series': 'Bering Sea Gold',
+            'season_number': 8,
+            'episode_number': 6,
             'age_limit': 14,
         },
     }
@@ -113,3 +122,46 @@ class DiscoveryGoIE(InfoExtractor):
             'formats': formats,
             'subtitles': subtitles,
         }
+
+
+class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE):
+    _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % ''
+    _TEST = {
+        'url': 'https://www.discoverygo.com/bering-sea-gold/',
+        'info_dict': {
+            'id': 'bering-sea-gold',
+            'title': 'Bering Sea Gold',
+            'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e',
+        },
+        'playlist_mincount': 6,
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return False if DiscoveryGoIE.suitable(url) else super(
+            DiscoveryGoPlaylistIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        entries = []
+        for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage):
+            data = self._parse_json(
+                mobj.group('json'), display_id,
+                transform_source=unescapeHTML, fatal=False)
+            if not isinstance(data, dict) or data.get('type') != 'episode':
+                continue
+            episode_url = data.get('socialUrl')
+            if not episode_url:
+                continue
+            entries.append(self.url_result(
+                episode_url, ie=DiscoveryGoIE.ie_key(),
+                video_id=data.get('id')))
+
+        return self.playlist_result(
+            entries, display_id,
+            remove_end(self._og_search_title(
+                webpage, fatal=False), ' | Discovery GO'),
+            self._og_search_description(webpage))
diff --git a/youtube_dl/extractor/discoverynetworks.py b/youtube_dl/extractor/discoverynetworks.py
new file mode 100644 (file)
index 0000000..b665378
--- /dev/null
@@ -0,0 +1,52 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveLegacyIE
+from ..compat import (
+    compat_parse_qs,
+    compat_urlparse,
+)
+from ..utils import smuggle_url
+
+
+class DiscoveryNetworksDeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?(?:discovery|tlc|animalplanet|dmax)\.de/(?:.*#(?P<id>\d+)|(?:[^/]+/)*videos/(?P<title>[^/?#]+))'
+
+    _TESTS = [{
+        'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
+        'info_dict': {
+            'id': '3235167922001',
+            'ext': 'mp4',
+            'title': 'Breaking Amish: Die Welt da draußen',
+            'description': (
+                'Vier Amische und eine Mennonitin wagen in New York'
+                '  den Sprung in ein komplett anderes Leben. Begleitet sie auf'
+                ' ihrem spannenden Weg.'),
+            'timestamp': 1396598084,
+            'upload_date': '20140404',
+            'uploader_id': '1659832546',
+        },
+    }, {
+        'url': 'http://www.dmax.de/programme/storage-hunters-uk/videos/storage-hunters-uk-episode-6/',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.discovery.de/#5332316765001',
+        'only_matching': True,
+    }]
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        brightcove_id = mobj.group('id')
+        if not brightcove_id:
+            title = mobj.group('title')
+            webpage = self._download_webpage(url, title)
+            brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
+            brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
+                brightcove_legacy_url).query)['@videoPlayer'][0]
+        return self.url_result(smuggle_url(
+            self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['DE']}),
+            'BrightcoveNew', brightcove_id)
index 9a83fb31ab57dd57a49c09bee6c2a8d3be7583ca..82d8a042f58e738422a4c64b7fa90f8ac127f5fb 100644 (file)
@@ -1,6 +1,9 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import time
+import hashlib
+
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
@@ -16,7 +19,7 @@ class DouyuTVIE(InfoExtractor):
         'info_dict': {
             'id': '17732',
             'display_id': 'iseven',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
             'description': r're:.*m7show@163\.com.*',
             'thumbnail': r're:^https?://.*\.jpg$',
@@ -31,7 +34,7 @@ class DouyuTVIE(InfoExtractor):
         'info_dict': {
             'id': '85982',
             'display_id': '85982',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
             'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
             'thumbnail': r're:^https?://.*\.jpg$',
@@ -47,7 +50,7 @@ class DouyuTVIE(InfoExtractor):
         'info_dict': {
             'id': '17732',
             'display_id': '17732',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
             'description': r're:.*m7show@163\.com.*',
             'thumbnail': r're:^https?://.*\.jpg$',
@@ -66,10 +69,6 @@ class DouyuTVIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    # Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf
-    # is encrypted originally, but ffdec can dump memory to get the decrypted one.
-    _API_KEY = 'A12Svb&%1UUmf@hC'
-
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
@@ -80,6 +79,7 @@ class DouyuTVIE(InfoExtractor):
             room_id = self._html_search_regex(
                 r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
 
+        # Grab metadata from mobile API
         room = self._download_json(
             'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id,
             note='Downloading room info')['data']
@@ -88,8 +88,19 @@ class DouyuTVIE(InfoExtractor):
         if room.get('show_status') == '2':
             raise ExtractorError('Live stream is offline', expected=True)
 
-        formats = self._extract_m3u8_formats(
-            room['hls_url'], video_id, ext='mp4')
+        # Grab the URL from PC client API
+        # The m3u8 url from mobile API requires re-authentication every 5 minutes
+        tt = int(time.time())
+        signContent = 'lapi/live/thirdPart/getPlay/%s?aid=pcclient&rate=0&time=%d9TUk5fjjUjg9qIMH3sdnh' % (room_id, tt)
+        sign = hashlib.md5(signContent.encode('ascii')).hexdigest()
+        video_url = self._download_json(
+            'http://coapi.douyucdn.cn/lapi/live/thirdPart/getPlay/' + room_id,
+            video_id, note='Downloading video URL info',
+            query={'rate': 0}, headers={
+                'auth': sign,
+                'time': str(tt),
+                'aid': 'pcclient'
+            })['data']['live_url']
 
         title = self._live_title(unescapeHTML(room['room_name']))
         description = room.get('show_details')
@@ -99,7 +110,7 @@ class DouyuTVIE(InfoExtractor):
         return {
             'id': room_id,
             'display_id': video_id,
-            'formats': formats,
+            'url': video_url,
             'title': title,
             'description': description,
             'thumbnail': thumbnail,
index 32028bc3b79b61d249ad4bccaebadf745f9f942a..87c5dd63ec271a6ef8770c2233da295721f863ca 100644 (file)
@@ -6,37 +6,24 @@ import re
 import time
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+    compat_urlparse,
+    compat_HTTPError,
+)
 from ..utils import (
     USER_AGENTS,
+    ExtractorError,
     int_or_none,
+    unified_strdate,
+    remove_end,
     update_url_query,
 )
 
 
 class DPlayIE(InfoExtractor):
-    _VALID_URL = r'https?://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?P<domain>www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
 
     _TESTS = [{
-        # geo restricted, via direct unsigned hls URL
-        'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/',
-        'info_dict': {
-            'id': '1255600',
-            'display_id': 'stagione-1-episodio-25',
-            'ext': 'mp4',
-            'title': 'Episodio 25',
-            'description': 'md5:cae5f40ad988811b197d2d27a53227eb',
-            'duration': 2761,
-            'timestamp': 1454701800,
-            'upload_date': '20160205',
-            'creator': 'RTIT',
-            'series': 'Take me out',
-            'season_number': 1,
-            'episode_number': 25,
-            'age_limit': 0,
-        },
-        'expected_warnings': ['Unable to download f4m manifest'],
-    }, {
         # non geo restricted, via secure api, unsigned download hls URL
         'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
         'info_dict': {
@@ -168,3 +155,90 @@ class DPlayIE(InfoExtractor):
             'formats': formats,
             'subtitles': subtitles,
         }
+
+
+class DPlayItIE(InfoExtractor):
+    _VALID_URL = r'https?://it\.dplay\.com/[^/]+/[^/]+/(?P<id>[^/?#]+)'
+    _GEO_COUNTRIES = ['IT']
+    _TEST = {
+        'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
+        'md5': '2b808ffb00fc47b884a172ca5d13053c',
+        'info_dict': {
+            'id': '6918',
+            'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij',
+            'ext': 'mp4',
+            'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij',
+            'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
+            'thumbnail': r're:^https?://.*\.jpe?g',
+            'upload_date': '20160524',
+            'series': 'Biografie imbarazzanti',
+            'season_number': 1,
+            'episode': 'Luigi Di Maio: la psicosi di Stanislawskij',
+            'episode_number': 1,
+        },
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        info_url = self._search_regex(
+            r'url\s*:\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
+            webpage, 'video id')
+
+        title = remove_end(self._og_search_title(webpage), ' | Dplay')
+
+        try:
+            info = self._download_json(
+                info_url, display_id, headers={
+                    'Authorization': 'Bearer %s' % self._get_cookies(url).get(
+                        'dplayit_token').value,
+                    'Referer': url,
+                })
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
+                info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
+                error = info['errors'][0]
+                if error.get('code') == 'access.denied.geoblocked':
+                    self.raise_geo_restricted(
+                        msg=error.get('detail'), countries=self._GEO_COUNTRIES)
+                raise ExtractorError(info['errors'][0]['detail'], expected=True)
+            raise
+
+        hls_url = info['data']['attributes']['streaming']['hls']['url']
+
+        formats = self._extract_m3u8_formats(
+            hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
+            m3u8_id='hls')
+
+        series = self._html_search_regex(
+            r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',
+            webpage, 'series', fatal=False)
+        episode = self._search_regex(
+            r'<p[^>]+class=["\'].*?\bdesc_ep\b.*?["\'][^>]*>\s*<br/>\s*<b>([^<]+)',
+            webpage, 'episode', fatal=False)
+
+        mobj = re.search(
+            r'(?s)<span[^>]+class=["\']dates["\'][^>]*>.+?\bS\.(?P<season_number>\d+)\s+E\.(?P<episode_number>\d+)\s*-\s*(?P<upload_date>\d{2}/\d{2}/\d{4})',
+            webpage)
+        if mobj:
+            season_number = int(mobj.group('season_number'))
+            episode_number = int(mobj.group('episode_number'))
+            upload_date = unified_strdate(mobj.group('upload_date'))
+        else:
+            season_number = episode_number = upload_date = None
+
+        return {
+            'id': info_url.rpartition('/')[-1],
+            'display_id': display_id,
+            'title': title,
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'series': series,
+            'season_number': season_number,
+            'episode': episode,
+            'episode_number': episode_number,
+            'upload_date': upload_date,
+            'formats': formats,
+        }
index e966d7483cdc2193cfc96d2bdd808c90e515f821..e4917014adae2e3fe286acc2a2cb66b6ddc3c735 100644 (file)
@@ -15,6 +15,8 @@ from ..utils import (
 
 class DRTVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
+    _GEO_BYPASS = False
+    _GEO_COUNTRIES = ['DK']
     IE_NAME = 'drtv'
     _TESTS = [{
         'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
@@ -137,7 +139,7 @@ class DRTVIE(InfoExtractor):
         if not formats and restricted_to_denmark:
             self.raise_geo_restricted(
                 'Unfortunately, DR is not allowed to show this program outside Denmark.',
-                expected=True)
+                countries=self._GEO_COUNTRIES)
 
         self._sort_formats(formats)
 
@@ -156,6 +158,7 @@ class DRTVIE(InfoExtractor):
 class DRTVLiveIE(InfoExtractor):
     IE_NAME = 'drtv:live'
     _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
+    _GEO_COUNTRIES = ['DK']
     _TEST = {
         'url': 'https://www.dr.dk/tv/live/dr1',
         'info_dict': {
index b1613a9d350721bfbcf73ae94726cc75ec9a13df..6a7028a4db9486608bd478e84978b563e0364736 100644 (file)
@@ -71,6 +71,7 @@ from .arte import (
 )
 from .atresplayer import AtresPlayerIE
 from .atttechchannel import ATTTechChannelIE
+from .atvat import ATVAtIE
 from .audimedia import AudiMediaIE
 from .audioboom import AudioBoomIE
 from .audiomack import AudiomackIE, AudiomackAlbumIE
@@ -117,6 +118,7 @@ from .bleacherreport import (
 from .blinkx import BlinkxIE
 from .bloomberg import BloombergIE
 from .bokecc import BokeCCIE
+from .bostonglobe import BostonGlobeIE
 from .bpb import BpbIE
 from .br import BRIE
 from .bravotv import BravoTVIE
@@ -246,7 +248,10 @@ from .dfb import DFBIE
 from .dhm import DHMIE
 from .dotsub import DotsubIE
 from .douyutv import DouyuTVIE
-from .dplay import DPlayIE
+from .dplay import (
+    DPlayIE,
+    DPlayItIE,
+)
 from .dramafever import (
     DramaFeverIE,
     DramaFeverSeriesIE,
@@ -262,7 +267,11 @@ from .dvtv import DVTVIE
 from .dumpert import DumpertIE
 from .defense import DefenseGouvFrIE
 from .discovery import DiscoveryIE
-from .discoverygo import DiscoveryGoIE
+from .discoverygo import (
+    DiscoveryGoIE,
+    DiscoveryGoPlaylistIE,
+)
+from .discoverynetworks import DiscoveryNetworksDeIE
 from .disney import DisneyIE
 from .dispeak import DigitallySpeakingIE
 from .dropbox import DropboxIE
@@ -793,6 +802,7 @@ from .rai import (
 )
 from .rbmaradio import RBMARadioIE
 from .rds import RDSIE
+from .redbulltv import RedBullTVIE
 from .redtube import RedTubeIE
 from .regiotv import RegioTVIE
 from .rentv import (
@@ -966,7 +976,6 @@ from .thisav import ThisAVIE
 from .thisoldhouse import ThisOldHouseIE
 from .threeqsdn import ThreeQSDNIE
 from .tinypic import TinyPicIE
-from .tlc import TlcDeIE
 from .tmz import (
     TMZIE,
     TMZArticleIE,
@@ -979,6 +988,7 @@ from .tnaflix import (
 )
 from .toggle import ToggleIE
 from .tonline import TOnlineIE
+from .toongoggles import ToonGogglesIE
 from .toutv import TouTvIE
 from .toypics import ToypicsUserIE, ToypicsIE
 from .traileraddict import TrailerAddictIE
@@ -999,6 +1009,7 @@ from .tunein import (
     TuneInTopicIE,
     TuneInShortenerIE,
 )
+from .tunepk import TunePkIE
 from .turbo import TurboIE
 from .tutv import TutvIE
 from .tv2 import (
@@ -1165,6 +1176,8 @@ from .voicerepublic import VoiceRepublicIE
 from .voxmedia import VoxMediaIE
 from .vporn import VpornIE
 from .vrt import VRTIE
+from .vrak import VrakIE
+from .medialaan import MedialaanIE
 from .vube import VubeIE
 from .vuclip import VuClipIE
 from .vvvvid import VVVVIDIE
index 2f3035147777550aded9378d99dbd3fea2c5ba75..f62ddebaeee5426a8f8486edbdfff3bba881d0c3 100644 (file)
@@ -54,7 +54,7 @@ class EyedoTVIE(InfoExtractor):
             'id': video_id,
             'title': title,
             'formats': self._extract_m3u8_formats(
-                m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'),
+                m3u8_url, video_id, 'mp4', 'm3u8_native'),
             'description': xpath_text(video_data, _add_ns('Description')),
             'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
             'uploader': xpath_text(video_data, _add_ns('Createur')),
index 70b8c95c5074dda5c553f1c1d1869165320a4707..b69c1ede0046d73e31df2098f78cf6dc20c254d4 100644 (file)
@@ -196,6 +196,10 @@ class FacebookIE(InfoExtractor):
     }, {
         'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
         'only_matching': True,
+    }, {
+        # no title
+        'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
+        'only_matching': True,
     }]
 
     @staticmethod
@@ -303,7 +307,7 @@ class FacebookIE(InfoExtractor):
         if not video_data:
             server_js_data = self._parse_json(
                 self._search_regex(
-                    r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall)',
+                    r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
                     webpage, 'js data', default='{}'),
                 video_id, transform_source=js_to_json, fatal=False)
             if server_js_data:
@@ -353,15 +357,15 @@ class FacebookIE(InfoExtractor):
         self._sort_formats(formats)
 
         video_title = self._html_search_regex(
-            r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title',
-            default=None)
+            r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
+            'title', default=None)
         if not video_title:
             video_title = self._html_search_regex(
                 r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
                 webpage, 'alternative title', default=None)
         if not video_title:
             video_title = self._html_search_meta(
-                'description', webpage, 'title')
+                'description', webpage, 'title', default=None)
         if video_title:
             video_title = limit_length(video_title, 80)
         else:
index 9f2e5d0652a3266c08e83567a3b0f650ec624720..159fdf9c476df24e15c2f02fcce513922acf85ca 100644 (file)
@@ -47,9 +47,12 @@ class FOXIE(AdobePassIE):
             resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
             query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
 
-        return {
+        info = self._search_json_ld(webpage, video_id, fatal=False)
+        info.update({
             '_type': 'url_transparent',
             'ie_key': 'ThePlatform',
             'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
             'id': video_id,
-        }
+        })
+
+        return info
index b98da692cb23ccc1a6de7a8657f0d8331640280f..b8fa175880f47d6050e4fa3908994bd9777131ac 100644 (file)
@@ -4,7 +4,8 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
     determine_ext,
-    unified_strdate,
+    extract_attributes,
+    int_or_none,
 )
 
 
@@ -19,6 +20,7 @@ class FranceCultureIE(InfoExtractor):
             'title': 'Rendez-vous au pays des geeks',
             'thumbnail': r're:^https?://.*\.jpg$',
             'upload_date': '20140301',
+            'timestamp': 1393642916,
             'vcodec': 'none',
         }
     }
@@ -28,30 +30,34 @@ class FranceCultureIE(InfoExtractor):
 
         webpage = self._download_webpage(url, display_id)
 
-        video_url = self._search_regex(
-            r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<button[^>]+data-asset-source="([^"]+)"',
-            webpage, 'video path')
+        video_data = extract_attributes(self._search_regex(
+            r'(?s)<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)',
+            webpage, 'video data'))
 
-        title = self._og_search_title(webpage)
+        video_url = video_data['data-asset-source']
+        title = video_data.get('data-asset-title') or self._og_search_title(webpage)
 
-        upload_date = unified_strdate(self._search_regex(
-            '(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<',
-            webpage, 'upload date', fatal=False))
+        description = self._html_search_regex(
+            r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
+            webpage, 'description', default=None)
         thumbnail = self._search_regex(
-            r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-dejavu-src="([^"]+)"',
+            r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
             webpage, 'thumbnail', fatal=False)
         uploader = self._html_search_regex(
-            r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
+            r'(?s)<span class="author">(.*?)</span>',
             webpage, 'uploader', default=None)
-        vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None
+        ext = determine_ext(video_url.lower())
 
         return {
             'id': display_id,
             'display_id': display_id,
             'url': video_url,
             'title': title,
+            'description': description,
             'thumbnail': thumbnail,
-            'vcodec': vcodec,
+            'ext': ext,
+            'vcodec': 'none' if ext == 'mp3' else None,
             'uploader': uploader,
-            'upload_date': upload_date,
+            'timestamp': int_or_none(video_data.get('data-asset-created-date')),
+            'duration': int_or_none(video_data.get('data-duration')),
         }
index a90f9156c694be3395dd505c6015f595fdb797b5..72a8459453bb2702df66664eec2ff16c0c78f1a9 100644 (file)
@@ -56,9 +56,8 @@ class FreshLiveIE(InfoExtractor):
         is_live = info.get('liveStreamUrl') is not None
 
         formats = self._extract_m3u8_formats(
-            stream_url, video_id, ext='mp4',
-            entry_protocol='m3u8' if is_live else 'm3u8_native',
-            m3u8_id='hls')
+            stream_url, video_id, 'mp4',
+            'm3u8_native', m3u8_id='hls')
 
         if is_live:
             title = self._live_title(title)
index 3fe0237b6ebb2c6821660f4cd671304be5768ee8..274f817384d65a6287427ad35f97d3126cea7cd0 100644 (file)
@@ -84,6 +84,7 @@ from .twentymin import TwentyMinutenIE
 from .ustream import UstreamIE
 from .openload import OpenloadIE
 from .videopress import VideoPressIE
+from .rutube import RutubeIE
 
 
 class GenericIE(InfoExtractor):
@@ -448,6 +449,23 @@ class GenericIE(InfoExtractor):
                 },
             }],
         },
+        {
+            # Brightcove with UUID in videoPlayer
+            'url': 'http://www8.hp.com/cn/zh/home.html',
+            'info_dict': {
+                'id': '5255815316001',
+                'ext': 'mp4',
+                'title': 'Sprocket Video - China',
+                'description': 'Sprocket Video - China',
+                'uploader': 'HP-Video Gallery',
+                'timestamp': 1482263210,
+                'upload_date': '20161220',
+                'uploader_id': '1107601872001',
+            },
+            'params': {
+                'skip_download': True,  # m3u8 download
+            },
+        },
         # ooyala video
         {
             'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
@@ -884,12 +902,13 @@ class GenericIE(InfoExtractor):
         },
         # LazyYT
         {
-            'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
+            'url': 'https://skiplagged.com/',
             'info_dict': {
-                'id': '1986',
-                'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
+                'id': 'skiplagged',
+                'title': 'Skiplagged: The smart way to find cheap flights',
             },
-            'playlist_mincount': 2,
+            'playlist_mincount': 1,
+            'add_ie': ['Youtube'],
         },
         # Cinchcast embed
         {
@@ -1516,11 +1535,39 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': [VideoPressIE.ie_key()],
         },
+        {
+            # Rutube embed
+            'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
+            'info_dict': {
+                'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
+                'ext': 'flv',
+                'title': 'Магаззино: Казань 2',
+                'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
+                'uploader': 'Магаззино',
+                'upload_date': '20170228',
+                'uploader_id': '996642',
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'add_ie': [RutubeIE.ie_key()],
+        },
         {
             # ThePlatform embedded with whitespaces in URLs
             'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
             'only_matching': True,
         },
+        {
+            # Senate ISVP iframe https
+            'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
+            'md5': 'fb8c70b0b515e5037981a2492099aab8',
+            'info_dict': {
+                'id': 'govtaff020316',
+                'ext': 'mp4',
+                'title': 'Integrated Senate Video Player',
+            },
+            'add_ie': [SenateISVPIE.ie_key()],
+        },
         # {
         #     # TODO: find another test
         #     # http://schema.org/VideoObject
@@ -1820,14 +1867,6 @@ class GenericIE(InfoExtractor):
         video_description = self._og_search_description(webpage, default=None)
         video_thumbnail = self._og_search_thumbnail(webpage, default=None)
 
-        # Helper method
-        def _playlist_from_matches(matches, getter=None, ie=None):
-            urlrs = orderedSet(
-                self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
-                for m in matches)
-            return self.playlist_result(
-                urlrs, playlist_id=video_id, playlist_title=video_title)
-
         # Look for Brightcove Legacy Studio embeds
         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
         if bc_urls:
@@ -1848,28 +1887,28 @@ class GenericIE(InfoExtractor):
         # Look for Brightcove New Studio embeds
         bc_urls = BrightcoveNewIE._extract_urls(webpage)
         if bc_urls:
-            return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
+            return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
 
         # Look for ThePlatform embeds
         tp_urls = ThePlatformIE._extract_urls(webpage)
         if tp_urls:
-            return _playlist_from_matches(tp_urls, ie='ThePlatform')
+            return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
 
         # Look for Vessel embeds
         vessel_urls = VesselIE._extract_urls(webpage)
         if vessel_urls:
-            return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
+            return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
 
         # Look for embedded rtl.nl player
         matches = re.findall(
             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
             webpage)
         if matches:
-            return _playlist_from_matches(matches, ie='RtlNl')
+            return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
 
         vimeo_urls = VimeoIE._extract_urls(url, webpage)
         if vimeo_urls:
-            return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
+            return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
 
         vid_me_embed_url = self._search_regex(
             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
@@ -1891,25 +1930,25 @@ class GenericIE(InfoExtractor):
                 (?:embed|v|p)/.+?)
             \1''', webpage)
         if matches:
-            return _playlist_from_matches(
-                matches, lambda m: unescapeHTML(m[1]))
+            return self.playlist_from_matches(
+                matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
 
         # Look for lazyYT YouTube embed
         matches = re.findall(
             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
         if matches:
-            return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
+            return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
 
         # Look for Wordpress "YouTube Video Importer" plugin
         matches = re.findall(r'''(?x)<div[^>]+
             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
         if matches:
-            return _playlist_from_matches(matches, lambda m: m[-1])
+            return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
 
         matches = DailymotionIE._extract_urls(webpage)
         if matches:
-            return _playlist_from_matches(matches)
+            return self.playlist_from_matches(matches, video_id, video_title)
 
         # Look for embedded Dailymotion playlist player (#3822)
         m = re.search(
@@ -1918,8 +1957,8 @@ class GenericIE(InfoExtractor):
             playlists = re.findall(
                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
             if playlists:
-                return _playlist_from_matches(
-                    playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
+                return self.playlist_from_matches(
+                    playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
 
         # Look for embedded Wistia player
         match = re.search(
@@ -2026,8 +2065,9 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
             if embeds:
-                return _playlist_from_matches(
-                    embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
+                return self.playlist_from_matches(
+                    embeds, video_id, video_title,
+                    getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
 
         # Look for Aparat videos
         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
@@ -2089,13 +2129,13 @@ class GenericIE(InfoExtractor):
         # Look for funnyordie embed
         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
         if matches:
-            return _playlist_from_matches(
-                matches, getter=unescapeHTML, ie='FunnyOrDie')
+            return self.playlist_from_matches(
+                matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
 
         # Look for BBC iPlayer embed
         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
         if matches:
-            return _playlist_from_matches(matches, ie='BBCCoUk')
+            return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
 
         # Look for embedded RUTV player
         rutv_url = RUTVIE._extract_url(webpage)
@@ -2110,32 +2150,32 @@ class GenericIE(InfoExtractor):
         # Look for embedded SportBox player
         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
         if sportbox_urls:
-            return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
+            return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
 
         # Look for embedded XHamster player
         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
         if xhamster_urls:
-            return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
+            return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
 
         # Look for embedded TNAFlixNetwork player
         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
         if tnaflix_urls:
-            return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
+            return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
 
         # Look for embedded PornHub player
         pornhub_urls = PornHubIE._extract_urls(webpage)
         if pornhub_urls:
-            return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
+            return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
 
         # Look for embedded DrTuber player
         drtuber_urls = DrTuberIE._extract_urls(webpage)
         if drtuber_urls:
-            return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
+            return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
 
         # Look for embedded RedTube player
         redtube_urls = RedTubeIE._extract_urls(webpage)
         if redtube_urls:
-            return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
+            return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
 
         # Look for embedded Tvigle player
         mobj = re.search(
@@ -2181,12 +2221,12 @@ class GenericIE(InfoExtractor):
         # Look for embedded soundcloud player
         soundcloud_urls = SoundcloudIE._extract_urls(webpage)
         if soundcloud_urls:
-            return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
+            return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
 
         # Look for tunein player
         tunein_urls = TuneInBaseIE._extract_urls(webpage)
         if tunein_urls:
-            return _playlist_from_matches(tunein_urls)
+            return self.playlist_from_matches(tunein_urls, video_id, video_title)
 
         # Look for embedded mtvservices player
         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
@@ -2469,30 +2509,36 @@ class GenericIE(InfoExtractor):
         # Look for DBTV embeds
         dbtv_urls = DBTVIE._extract_urls(webpage)
         if dbtv_urls:
-            return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
+            return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
 
         # Look for Videa embeds
         videa_urls = VideaIE._extract_urls(webpage)
         if videa_urls:
-            return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
+            return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
 
         # Look for 20 minuten embeds
         twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
         if twentymin_urls:
-            return _playlist_from_matches(
-                twentymin_urls, ie=TwentyMinutenIE.ie_key())
+            return self.playlist_from_matches(
+                twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
 
         # Look for Openload embeds
         openload_urls = OpenloadIE._extract_urls(webpage)
         if openload_urls:
-            return _playlist_from_matches(
-                openload_urls, ie=OpenloadIE.ie_key())
+            return self.playlist_from_matches(
+                openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
 
         # Look for VideoPress embeds
         videopress_urls = VideoPressIE._extract_urls(webpage)
         if videopress_urls:
-            return _playlist_from_matches(
-                videopress_urls, ie=VideoPressIE.ie_key())
+            return self.playlist_from_matches(
+                videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
+
+        # Look for Rutube embeds
+        rutube_urls = RutubeIE._extract_urls(webpage)
+        if rutube_urls:
+            return self.playlist_from_matches(
+                rutube_urls, ie=RutubeIE.ie_key())
 
         # Looking for http://schema.org/VideoObject
         json_ld = self._search_json_ld(
@@ -2521,7 +2567,11 @@ class GenericIE(InfoExtractor):
         jwplayer_data = self._find_jwplayer_data(
             webpage, video_id, transform_source=js_to_json)
         if jwplayer_data:
-            return self._parse_jwplayer_data(jwplayer_data, video_id)
+            info = self._parse_jwplayer_data(
+                jwplayer_data, video_id, require_title=False)
+            if not info.get('title'):
+                info['title'] = video_title
+            return info
 
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
@@ -2596,11 +2646,14 @@ class GenericIE(InfoExtractor):
                     found = re.search(REDIRECT_REGEX, refresh_header)
             if found:
                 new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
-                self.report_following_redirect(new_url)
-                return {
-                    '_type': 'url',
-                    'url': new_url,
-                }
+                if new_url != url:
+                    self.report_following_redirect(new_url)
+                    return {
+                        '_type': 'url',
+                        'url': new_url,
+                    }
+                else:
+                    found = None
 
         if not found:
             # twitter:player is a https URL to iframe player that may or may not
index 21ed846b25c16df23de22897b1110fb0ab6ff6dd..4c9be47b4faa0e67baa305d22aadbf43387650bc 100644 (file)
@@ -36,7 +36,7 @@ class GoIE(AdobePassIE):
             'requestor_id': 'DisneyXD',
         }
     }
-    _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
+    _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
     _TESTS = [{
         'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
         'info_dict': {
@@ -52,6 +52,12 @@ class GoIE(AdobePassIE):
     }, {
         'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601',
         'only_matching': True,
+    }, {
+        'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
+        'only_matching': True,
+    }, {
+        'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 8116ad9bd42f840bc5875070d5f40e8d904b7abb..931f71a5a3c101d266d06c2d07815dda11fb1dba 100644 (file)
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     xpath_text,
     xpath_element,
@@ -14,14 +15,26 @@ from ..utils import (
 
 class HBOBaseIE(InfoExtractor):
     _FORMATS_INFO = {
+        'pro7': {
+            'width': 1280,
+            'height': 720,
+        },
         '1920': {
             'width': 1280,
             'height': 720,
         },
+        'pro6': {
+            'width': 768,
+            'height': 432,
+        },
         '640': {
             'width': 768,
             'height': 432,
         },
+        'pro5': {
+            'width': 640,
+            'height': 360,
+        },
         'highwifi': {
             'width': 640,
             'height': 360,
@@ -78,6 +91,17 @@ class HBOBaseIE(InfoExtractor):
                     formats.extend(self._extract_m3u8_formats(
                         video_url.replace('.tar', '/base_index_w8.m3u8'),
                         video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+                elif source.tag == 'hls':
+                    # #EXT-X-BYTERANGE is not supported by native hls downloader
+                    # and ffmpeg (#10955)
+                    # formats.extend(self._extract_m3u8_formats(
+                    #     video_url.replace('.tar', '/base_index.m3u8'),
+                    #     video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+                    continue
+                elif source.tag == 'dash':
+                    formats.extend(self._extract_mpd_formats(
+                        video_url.replace('.tar', '/manifest.mpd'),
+                        video_id, mpd_id='dash', fatal=False))
                 else:
                     format_info = self._FORMATS_INFO.get(source.tag, {})
                     formats.append({
@@ -112,10 +136,11 @@ class HBOBaseIE(InfoExtractor):
 
 
 class HBOIE(HBOBaseIE):
+    IE_NAME = 'hbo'
     _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
-        'md5': '1c33253f0c7782142c993c0ba62a8753',
+        'md5': '2c6a6bc1222c7e91cb3334dad1746e5a',
         'info_dict': {
             'id': '1437839',
             'ext': 'mp4',
@@ -131,11 +156,12 @@ class HBOIE(HBOBaseIE):
 
 
 class HBOEpisodeIE(HBOBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P<id>[0-9a-z-]+)\.html'
+    IE_NAME = 'hbo:episode'
+    _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?P<path>(?!video)(?:(?:[^/]+/)+video|watch-free-episodes)/(?P<id>[0-9a-z-]+))(?:\.html)?'
 
     _TESTS = [{
         'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true',
-        'md5': '689132b253cc0ab7434237fc3a293210',
+        'md5': '61ead79b9c0dfa8d3d4b07ef4ac556fb',
         'info_dict': {
             'id': '1439518',
             'display_id': 'ep-52-inside-the-episode',
@@ -147,16 +173,19 @@ class HBOEpisodeIE(HBOBaseIE):
     }, {
         'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true',
         'only_matching': True,
+    }, {
+        'url': 'http://www.hbo.com/watch-free-episodes/last-week-tonight-with-john-oliver',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
+        path, display_id = re.match(self._VALID_URL, url).groups()
 
-        webpage = self._download_webpage(url, display_id)
+        content = self._download_json(
+            'http://www.hbo.com/api/content/' + path, display_id)['content']
 
-        video_id = self._search_regex(
-            r'(?P<q1>[\'"])videoId(?P=q1)\s*:\s*(?P<q2>[\'"])(?P<video_id>\d+)(?P=q2)',
-            webpage, 'video ID', group='video_id')
+        video_id = compat_str((content.get('parsed', {}).get(
+            'common:FullBleedVideo', {}) or content['selectedEpisode'])['videoId'])
 
         info_dict = self._extract_from_id(video_id)
         info_dict['display_id'] = display_id
index c863413bf008baa6baf0233b8185a10ea119d091..7f946c6ed9d64c54670d7fba68058144bcab494b 100644 (file)
@@ -119,7 +119,8 @@ class LivestreamIE(InfoExtractor):
         m3u8_url = video_data.get('m3u8_url')
         if m3u8_url:
             formats.extend(self._extract_m3u8_formats(
-                m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+                m3u8_url, video_id, 'mp4', 'm3u8_native',
+                m3u8_id='hls', fatal=False))
 
         f4m_url = video_data.get('f4m_url')
         if f4m_url:
@@ -158,11 +159,11 @@ class LivestreamIE(InfoExtractor):
         if smil_url:
             formats.extend(self._extract_smil_formats(smil_url, broadcast_id))
 
-        entry_protocol = 'm3u8' if is_live else 'm3u8_native'
         m3u8_url = stream_info.get('m3u8_url')
         if m3u8_url:
             formats.extend(self._extract_m3u8_formats(
-                m3u8_url, broadcast_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False))
+                m3u8_url, broadcast_id, 'mp4', 'm3u8_native',
+                m3u8_id='hls', fatal=False))
 
         rtsp_url = stream_info.get('rtsp_url')
         if rtsp_url:
@@ -276,7 +277,7 @@ class LivestreamOriginalIE(InfoExtractor):
             'view_count': view_count,
         }
 
-    def _extract_video_formats(self, video_data, video_id, entry_protocol):
+    def _extract_video_formats(self, video_data, video_id):
         formats = []
 
         progressive_url = video_data.get('progressiveUrl')
@@ -289,7 +290,8 @@ class LivestreamOriginalIE(InfoExtractor):
         m3u8_url = video_data.get('httpUrl')
         if m3u8_url:
             formats.extend(self._extract_m3u8_formats(
-                m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False))
+                m3u8_url, video_id, 'mp4', 'm3u8_native',
+                m3u8_id='hls', fatal=False))
 
         rtsp_url = video_data.get('rtspUrl')
         if rtsp_url:
@@ -340,11 +342,10 @@ class LivestreamOriginalIE(InfoExtractor):
                 }
             video_data = self._download_json(stream_url, content_id)
             is_live = video_data.get('isLive')
-            entry_protocol = 'm3u8' if is_live else 'm3u8_native'
             info.update({
                 'id': content_id,
                 'title': self._live_title(info['title']) if is_live else info['title'],
-                'formats': self._extract_video_formats(video_data, content_id, entry_protocol),
+                'formats': self._extract_video_formats(video_data, content_id),
                 'is_live': is_live,
             })
             return info
diff --git a/youtube_dl/extractor/medialaan.py b/youtube_dl/extractor/medialaan.py
new file mode 100644 (file)
index 0000000..6e06747
--- /dev/null
@@ -0,0 +1,259 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    parse_duration,
+    try_get,
+    unified_timestamp,
+    urlencode_postdata,
+)
+
+
+class MedialaanIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:www\.)?
+                        (?:
+                            (?P<site_id>vtm|q2|vtmkzoom)\.be/
+                            (?:
+                                video(?:/[^/]+/id/|/?\?.*?\baid=)|
+                                (?:[^/]+/)*
+                            )
+                        )
+                        (?P<id>[^/?#&]+)
+                    '''
+    _NETRC_MACHINE = 'medialaan'
+    _APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
+    _SITE_TO_APP_ID = {
+        'vtm': 'vtm_watch',
+        'q2': 'q2',
+        'vtmkzoom': 'vtmkzoom',
+    }
+    _TESTS = [{
+        # vod
+        'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
+        'info_dict': {
+            'id': 'vtm_20170219_VM0678361_vtmwatch',
+            'ext': 'mp4',
+            'title': 'Allemaal Chris afl. 6',
+            'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
+            'timestamp': 1487533280,
+            'upload_date': '20170219',
+            'duration': 2562,
+            'series': 'Allemaal Chris',
+            'season': 'Allemaal Chris',
+            'season_number': 1,
+            'season_id': '256936078124527',
+            'episode': 'Allemaal Chris afl. 6',
+            'episode_number': 6,
+            'episode_id': '256936078591527',
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'skip': 'Requires account credentials',
+    }, {
+        # clip
+        'url': 'http://vtm.be/video?aid=168332',
+        'info_dict': {
+            'id': '168332',
+            'ext': 'mp4',
+            'title': '"Veronique liegt!"',
+            'description': 'md5:1385e2b743923afe54ba4adc38476155',
+            'timestamp': 1489002029,
+            'upload_date': '20170308',
+            'duration': 96,
+        },
+    }, {
+        # vod
+        'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
+        'only_matching': True,
+    }, {
+        # vod
+        'url': 'http://vtm.be/video?aid=163157',
+        'only_matching': True,
+    }, {
+        # vod
+        'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
+        'only_matching': True,
+    }, {
+        # clip
+        'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
+        'only_matching': True,
+    }]
+
+    def _real_initialize(self):
+        self._logged_in = False
+
+    def _login(self):
+        username, password = self._get_login_info()
+        if username is None:
+            self.raise_login_required()
+
+        auth_data = {
+            'APIKey': self._APIKEY,
+            'sdk': 'js_6.1',
+            'format': 'json',
+            'loginID': username,
+            'password': password,
+        }
+
+        auth_info = self._download_json(
+            'https://accounts.eu1.gigya.com/accounts.login', None,
+            note='Logging in', errnote='Unable to log in',
+            data=urlencode_postdata(auth_data))
+
+        error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
+        if error_message:
+            raise ExtractorError(
+                'Unable to login: %s' % error_message, expected=True)
+
+        self._uid = auth_info['UID']
+        self._uid_signature = auth_info['UIDSignature']
+        self._signature_timestamp = auth_info['signatureTimestamp']
+
+        self._logged_in = True
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id, site_id = mobj.group('id', 'site_id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        config = self._parse_json(
+            self._search_regex(
+                r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);',
+                webpage, 'config', default='{}'), video_id,
+            transform_source=lambda s: s.replace(
+                '\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'"))
+
+        vod_id = config.get('vodId') or self._search_regex(
+            (r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
+             r'<[^>]+id=["\']vod-(\d+)'),
+            webpage, 'video_id', default=None)
+
+        # clip, no authentication required
+        if not vod_id:
+            player = self._parse_json(
+                self._search_regex(
+                    r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
+                    default=''),
+                video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
+            if player:
+                video = player[-1]
+                info = {
+                    'id': video_id,
+                    'url': video['videoUrl'],
+                    'title': video['title'],
+                    'thumbnail': video.get('imageUrl'),
+                    'timestamp': int_or_none(video.get('createdDate')),
+                    'duration': int_or_none(video.get('duration')),
+                }
+            else:
+                info = self._parse_html5_media_entries(
+                    url, webpage, video_id, m3u8_id='hls')[0]
+                info.update({
+                    'id': video_id,
+                    'title': self._html_search_meta('description', webpage),
+                    'duration': parse_duration(self._html_search_meta('duration', webpage)),
+                })
+        # vod, authentication required
+        else:
+            if not self._logged_in:
+                self._login()
+
+            settings = self._parse_json(
+                self._search_regex(
+                    r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+                    webpage, 'drupal settings', default='{}'),
+                video_id)
+
+            def get(container, item):
+                return try_get(
+                    settings, lambda x: x[container][item],
+                    compat_str) or self._search_regex(
+                    r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
+                    default=None)
+
+            app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
+            sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
+
+            data = self._download_json(
+                'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
+                video_id, query={
+                    'app_id': app_id,
+                    'user_network': sso,
+                    'UID': self._uid,
+                    'UIDSignature': self._uid_signature,
+                    'signatureTimestamp': self._signature_timestamp,
+                })
+
+            formats = self._extract_m3u8_formats(
+                data['response']['uri'], video_id, entry_protocol='m3u8_native',
+                ext='mp4', m3u8_id='hls')
+
+            self._sort_formats(formats)
+
+            info = {
+                'id': vod_id,
+                'formats': formats,
+            }
+
+            api_key = get('vod', 'apiKey')
+            channel = get('medialaanGigya', 'channel')
+
+            if api_key:
+                videos = self._download_json(
+                    'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
+                    query={
+                        'channels': channel,
+                        'ids': vod_id,
+                        'limit': 1,
+                        'apikey': api_key,
+                    })
+                if videos:
+                    video = try_get(
+                        videos, lambda x: x['response']['videos'][0], dict)
+                    if video:
+                        def get(container, item, expected_type=None):
+                            return try_get(
+                                video, lambda x: x[container][item], expected_type)
+
+                        def get_string(container, item):
+                            return get(container, item, compat_str)
+
+                        info.update({
+                            'series': get_string('program', 'title'),
+                            'season': get_string('season', 'title'),
+                            'season_number': int_or_none(get('season', 'number')),
+                            'season_id': get_string('season', 'id'),
+                            'episode': get_string('episode', 'title'),
+                            'episode_number': int_or_none(get('episode', 'number')),
+                            'episode_id': get_string('episode', 'id'),
+                            'duration': int_or_none(
+                                video.get('duration')) or int_or_none(
+                                video.get('durationMillis'), scale=1000),
+                            'title': get_string('episode', 'title'),
+                            'description': get_string('episode', 'text'),
+                            'timestamp': unified_timestamp(get_string(
+                                'publication', 'begin')),
+                        })
+
+            if not info.get('title'):
+                info['title'] = try_get(
+                    config, lambda x: x['videoConfig']['title'],
+                    compat_str) or self._html_search_regex(
+                    r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
+                    default=None) or self._og_search_title(webpage)
+
+        if not info.get('description'):
+            info['description'] = self._html_search_regex(
+                r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
+                webpage, 'description', default=None)
+
+        return info
index ec1b4c4fea111ded48f530c7020dd9aabd38dbb8..40f72d66f25eb33d2fdb75c8a03e37d873b26afe 100644 (file)
@@ -51,6 +51,7 @@ class MioMioIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'マツコの知らない世界【劇的進化SP!ビニール傘&冷凍食品2016】 1_2 - 16 05 31',
         },
+        'skip': 'Unable to load videos',
     }]
 
     def _extract_mioplayer(self, webpage, video_id, title, http_headers):
@@ -94,9 +95,18 @@ class MioMioIE(InfoExtractor):
 
         return entries
 
+    def _download_chinese_webpage(self, *args, **kwargs):
+        # Requests with English locales return garbage
+        headers = {
+            'Accept-Language': 'zh-TW,en-US;q=0.7,en;q=0.3',
+        }
+        kwargs.setdefault('headers', {}).update(headers)
+        return self._download_webpage(*args, **kwargs)
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_chinese_webpage(
+            url, video_id)
 
         title = self._html_search_meta(
             'description', webpage, 'title', fatal=True)
@@ -106,7 +116,7 @@ class MioMioIE(InfoExtractor):
 
         if '_h5' in mioplayer_path:
             player_url = compat_urlparse.urljoin(url, mioplayer_path)
-            player_webpage = self._download_webpage(
+            player_webpage = self._download_chinese_webpage(
                 player_url, video_id,
                 note='Downloading player webpage', headers={'Referer': url})
             entries = self._parse_html5_media_entries(player_url, player_webpage, video_id)
index 79e0b8ada1aaefeb90b479967d0f9e2197818bff..28b743cca1f2355a24cb6b913c7a7410f40d595f 100644 (file)
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import uuid
 
 from .common import InfoExtractor
+from .ooyala import OoyalaIE
 from ..compat import (
     compat_str,
     compat_urllib_parse_urlencode,
@@ -24,6 +25,9 @@ class MiTeleBaseIE(InfoExtractor):
             r'(?s)(<ms-video-player.+?</ms-video-player>)',
             webpage, 'ms video player'))
         video_id = player_data['data-media-id']
+        if player_data.get('data-cms-id') == 'ooyala':
+            return self.url_result(
+                'ooyala:%s' % video_id, ie=OoyalaIE.ie_key(), video_id=video_id)
         config_url = compat_urlparse.urljoin(url, player_data['data-config'])
         config = self._download_json(
             config_url, video_id, 'Downloading config JSON')
index d9943fc2c9eef739dc375c58c743bc5b8d829309..8961309fdabccb34697b3ef29e70294414cd5802 100644 (file)
@@ -34,12 +34,6 @@ class NineCNineMediaStackIE(NineCNineMediaBaseIE):
         formats.extend(self._extract_f4m_formats(
             stack_base_url + 'f4m', stack_id,
             f4m_id='hds', fatal=False))
-        mp4_url = self._download_webpage(stack_base_url + 'pd', stack_id, fatal=False)
-        if mp4_url:
-            formats.append({
-                'url': mp4_url,
-                'format_id': 'mp4',
-            })
         self._sort_formats(formats)
 
         return {
index 50473d777dd2d1e3d238a7215a0b0a0f7b2ebef8..38fefe492b5f60f3952023d17c41721b7c6df412 100644 (file)
@@ -3,41 +3,27 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..compat import (
+    compat_HTTPError,
+    compat_str,
+)
 from ..utils import (
+    determine_ext,
+    ExtractorError,
     fix_xml_ampersands,
     orderedSet,
     parse_duration,
     qualities,
     strip_jsonp,
     unified_strdate,
-    ExtractorError,
 )
 
 
 class NPOBaseIE(InfoExtractor):
     def _get_token(self, video_id):
-        token_page = self._download_webpage(
-            'http://ida.omroep.nl/npoplayer/i.js',
-            video_id, note='Downloading token')
-        token = self._search_regex(
-            r'npoplayer\.token = "(.+?)"', token_page, 'token')
-        # Decryption algorithm extracted from http://npoplayer.omroep.nl/csjs/npoplayer-min.js
-        token_l = list(token)
-        first = second = None
-        for i in range(5, len(token_l) - 4):
-            if token_l[i].isdigit():
-                if first is None:
-                    first = i
-                elif second is None:
-                    second = i
-        if first is None or second is None:
-            first = 12
-            second = 13
-
-        token_l[first], token_l[second] = token_l[second], token_l[first]
-
-        return ''.join(token_l)
+        return self._download_json(
+            'http://ida.omroep.nl/app.php/auth', video_id,
+            note='Downloading token')['token']
 
 
 class NPOIE(NPOBaseIE):
@@ -58,103 +44,113 @@ class NPOIE(NPOBaseIE):
                         (?P<id>[^/?#]+)
                 '''
 
-    _TESTS = [
-        {
-            'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
-            'md5': '4b3f9c429157ec4775f2c9cb7b911016',
-            'info_dict': {
-                'id': 'VPWON_1220719',
-                'ext': 'm4v',
-                'title': 'Nieuwsuur',
-                'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
-                'upload_date': '20140622',
-            },
+    _TESTS = [{
+        'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
+        'md5': '4b3f9c429157ec4775f2c9cb7b911016',
+        'info_dict': {
+            'id': 'VPWON_1220719',
+            'ext': 'm4v',
+            'title': 'Nieuwsuur',
+            'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
+            'upload_date': '20140622',
         },
-        {
-            'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
-            'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
-            'info_dict': {
-                'id': 'VARA_101191800',
-                'ext': 'm4v',
-                'title': 'De Mega Mike & Mega Thomas show: The best of.',
-                'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
-                'upload_date': '20090227',
-                'duration': 2400,
-            },
+    }, {
+        'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
+        'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
+        'info_dict': {
+            'id': 'VARA_101191800',
+            'ext': 'm4v',
+            'title': 'De Mega Mike & Mega Thomas show: The best of.',
+            'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
+            'upload_date': '20090227',
+            'duration': 2400,
         },
-        {
-            'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289',
-            'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
-            'info_dict': {
-                'id': 'VPWON_1169289',
-                'ext': 'm4v',
-                'title': 'Tegenlicht: De toekomst komt uit Afrika',
-                'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
-                'upload_date': '20130225',
-                'duration': 3000,
-            },
+    }, {
+        'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289',
+        'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
+        'info_dict': {
+            'id': 'VPWON_1169289',
+            'ext': 'm4v',
+            'title': 'Tegenlicht: Zwart geld. De toekomst komt uit Afrika',
+            'description': 'md5:52cf4eefbc96fffcbdc06d024147abea',
+            'upload_date': '20130225',
+            'duration': 3000,
         },
-        {
-            'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706',
-            'info_dict': {
-                'id': 'WO_VPRO_043706',
-                'ext': 'wmv',
-                'title': 'De nieuwe mens - Deel 1',
-                'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b',
-                'duration': 4680,
-            },
-            'params': {
-                # mplayer mms download
-                'skip_download': True,
-            }
+    }, {
+        'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706',
+        'info_dict': {
+            'id': 'WO_VPRO_043706',
+            'ext': 'm4v',
+            'title': 'De nieuwe mens - Deel 1',
+            'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b',
+            'duration': 4680,
         },
+        'params': {
+            'skip_download': True,
+        }
+    }, {
         # non asf in streams
-        {
-            'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771',
-            'md5': 'b3da13de374cbe2d5332a7e910bef97f',
-            'info_dict': {
-                'id': 'WO_NOS_762771',
-                'ext': 'mp4',
-                'title': 'Hoe gaat Europa verder na Parijs?',
-            },
-        },
-        {
-            'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content',
-            'md5': '01c6a2841675995da1f0cf776f03a9c3',
-            'info_dict': {
-                'id': 'VPWON_1233944',
-                'ext': 'm4v',
-                'title': 'Aap, poot, pies',
-                'description': 'md5:c9c8005d1869ae65b858e82c01a91fde',
-                'upload_date': '20150508',
-                'duration': 599,
-            },
+        'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771',
+        'info_dict': {
+            'id': 'WO_NOS_762771',
+            'ext': 'mp4',
+            'title': 'Hoe gaat Europa verder na Parijs?',
         },
-        {
-            'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698',
-            'md5': 'd30cd8417b8b9bca1fdff27428860d08',
-            'info_dict': {
-                'id': 'POW_00996502',
-                'ext': 'm4v',
-                'title': '''"Dit is wel een 'landslide'..."''',
-                'description': 'md5:f8d66d537dfb641380226e31ca57b8e8',
-                'upload_date': '20150508',
-                'duration': 462,
-            },
+        'params': {
+            'skip_download': True,
+        }
+    }, {
+        'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content',
+        'info_dict': {
+            'id': 'VPWON_1233944',
+            'ext': 'm4v',
+            'title': 'Aap, poot, pies',
+            'description': 'md5:c9c8005d1869ae65b858e82c01a91fde',
+            'upload_date': '20150508',
+            'duration': 599,
         },
-        {
-            'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547',
-            'only_matching': True,
+        'params': {
+            'skip_download': True,
+        }
+    }, {
+        'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698',
+        'info_dict': {
+            'id': 'POW_00996502',
+            'ext': 'm4v',
+            'title': '''"Dit is wel een 'landslide'..."''',
+            'description': 'md5:f8d66d537dfb641380226e31ca57b8e8',
+            'upload_date': '20150508',
+            'duration': 462,
         },
-        {
-            'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118',
-            'only_matching': True,
+        'params': {
+            'skip_download': True,
+        }
+    }, {
+        # audio
+        'url': 'http://www.npo.nl/jouw-stad-rotterdam/29-01-2017/RBX_FUNX_6683215/RBX_FUNX_7601437',
+        'info_dict': {
+            'id': 'RBX_FUNX_6683215',
+            'ext': 'mp3',
+            'title': 'Jouw Stad Rotterdam',
+            'description': 'md5:db251505244f097717ec59fabc372d9f',
         },
-        {
-            'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990',
-            'only_matching': True,
+        'params': {
+            'skip_download': True,
         }
-    ]
+    }, {
+        'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990',
+        'only_matching': True,
+    }, {
+        # live stream
+        'url': 'npo:LI_NL1_4188102',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -183,70 +179,115 @@ class NPOIE(NPOBaseIE):
         token = self._get_token(video_id)
 
         formats = []
+        urls = set()
+
+        quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
+        items = self._download_json(
+            'http://ida.omroep.nl/app.php/%s' % video_id, video_id,
+            'Downloading formats JSON', query={
+                'adaptive': 'yes',
+                'token': token,
+            })['items'][0]
+        for num, item in enumerate(items):
+            item_url = item.get('url')
+            if not item_url or item_url in urls:
+                continue
+            urls.add(item_url)
+            format_id = self._search_regex(
+                r'video/ida/([^/]+)', item_url, 'format id',
+                default=None)
+
+            def add_format_url(format_url):
+                formats.append({
+                    'url': format_url,
+                    'format_id': format_id,
+                    'quality': quality(format_id),
+                })
+
+            # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706
+            if item.get('contentType') in ('url', 'audio'):
+                add_format_url(item_url)
+                continue
 
-        pubopties = metadata.get('pubopties')
-        if pubopties:
-            quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
-            for format_id in pubopties:
-                format_info = self._download_json(
-                    'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s'
-                    % (video_id, format_id, token),
-                    video_id, 'Downloading %s JSON' % format_id)
-                if format_info.get('error_code', 0) or format_info.get('errorcode', 0):
+            try:
+                stream_info = self._download_json(
+                    item_url + '&type=json', video_id,
+                    'Downloading %s stream JSON'
+                    % item.get('label') or item.get('format') or format_id or num)
+            except ExtractorError as ee:
+                if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+                    error = (self._parse_json(
+                        ee.cause.read().decode(), video_id,
+                        fatal=False) or {}).get('errorstring')
+                    if error:
+                        raise ExtractorError(error, expected=True)
+                raise
+            # Stream URL instead of JSON, example: npo:LI_NL1_4188102
+            if isinstance(stream_info, compat_str):
+                if not stream_info.startswith('http'):
                     continue
-                streams = format_info.get('streams')
-                if streams:
-                    try:
-                        video_info = self._download_json(
-                            streams[0] + '&type=json',
-                            video_id, 'Downloading %s stream JSON' % format_id)
-                    except ExtractorError as ee:
-                        if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
-                            error = (self._parse_json(ee.cause.read().decode(), video_id, fatal=False) or {}).get('errorstring')
-                            if error:
-                                raise ExtractorError(error, expected=True)
-                        raise
-                else:
-                    video_info = format_info
-                video_url = video_info.get('url')
-                if not video_url:
+                video_url = stream_info
+            # JSON
+            else:
+                video_url = stream_info.get('url')
+            if not video_url or video_url in urls:
+                continue
+            urls.add(item_url)
+            if determine_ext(video_url) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, ext='mp4',
+                    entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+            else:
+                add_format_url(video_url)
+
+        is_live = metadata.get('medium') == 'live'
+
+        if not is_live:
+            for num, stream in enumerate(metadata.get('streams', [])):
+                stream_url = stream.get('url')
+                if not stream_url or stream_url in urls:
                     continue
-                if format_id == 'adaptive':
-                    formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
-                else:
+                urls.add(stream_url)
+                # smooth streaming is not supported
+                stream_type = stream.get('type', '').lower()
+                if stream_type in ['ss', 'ms']:
+                    continue
+                if stream_type == 'hds':
+                    f4m_formats = self._extract_f4m_formats(
+                        stream_url, video_id, fatal=False)
+                    # f4m downloader downloads only piece of live stream
+                    for f4m_format in f4m_formats:
+                        f4m_format['preference'] = -1
+                    formats.extend(f4m_formats)
+                elif stream_type == 'hls':
+                    formats.extend(self._extract_m3u8_formats(
+                        stream_url, video_id, ext='mp4', fatal=False))
+                # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706
+                elif '.asf' in stream_url:
+                    asx = self._download_xml(
+                        stream_url, video_id,
+                        'Downloading stream %d ASX playlist' % num,
+                        transform_source=fix_xml_ampersands, fatal=False)
+                    if not asx:
+                        continue
+                    ref = asx.find('./ENTRY/Ref')
+                    if ref is None:
+                        continue
+                    video_url = ref.get('href')
+                    if not video_url or video_url in urls:
+                        continue
+                    urls.add(video_url)
                     formats.append({
                         'url': video_url,
-                        'format_id': format_id,
-                        'quality': quality(format_id),
+                        'ext': stream.get('formaat', 'asf'),
+                        'quality': stream.get('kwaliteit'),
+                        'preference': -10,
                     })
-
-        streams = metadata.get('streams')
-        if streams:
-            for i, stream in enumerate(streams):
-                stream_url = stream.get('url')
-                if not stream_url:
-                    continue
-                if '.asf' not in stream_url:
+                else:
                     formats.append({
                         'url': stream_url,
                         'quality': stream.get('kwaliteit'),
                     })
-                    continue
-                asx = self._download_xml(
-                    stream_url, video_id,
-                    'Downloading stream %d ASX playlist' % i,
-                    transform_source=fix_xml_ampersands)
-                ref = asx.find('./ENTRY/Ref')
-                if ref is None:
-                    continue
-                video_url = ref.get('href')
-                if not video_url:
-                    continue
-                formats.append({
-                    'url': video_url,
-                    'ext': stream.get('formaat', 'asf'),
-                    'quality': stream.get('kwaliteit'),
-                })
 
         self._sort_formats(formats)
 
@@ -259,28 +300,28 @@ class NPOIE(NPOBaseIE):
 
         return {
             'id': video_id,
-            'title': title,
+            'title': self._live_title(title) if is_live else title,
             'description': metadata.get('info'),
             'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'],
             'upload_date': unified_strdate(metadata.get('gidsdatum')),
             'duration': parse_duration(metadata.get('tijdsduur')),
             'formats': formats,
             'subtitles': subtitles,
+            'is_live': is_live,
         }
 
 
 class NPOLiveIE(NPOBaseIE):
     IE_NAME = 'npo.nl:live'
-    _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>.+)'
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>[^/?#&]+)'
 
     _TEST = {
         'url': 'http://www.npo.nl/live/npo-1',
         'info_dict': {
-            'id': 'LI_NEDERLAND1_136692',
+            'id': 'LI_NL1_4188102',
             'display_id': 'npo-1',
             'ext': 'mp4',
-            'title': 're:^Nederland 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
-            'description': 'Livestream',
+            'title': 're:^NPO 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
             'is_live': True,
         },
         'params': {
@@ -296,58 +337,12 @@ class NPOLiveIE(NPOBaseIE):
         live_id = self._search_regex(
             r'data-prid="([^"]+)"', webpage, 'live id')
 
-        metadata = self._download_json(
-            'http://e.omroep.nl/metadata/%s' % live_id,
-            display_id, transform_source=strip_jsonp)
-
-        token = self._get_token(display_id)
-
-        formats = []
-
-        streams = metadata.get('streams')
-        if streams:
-            for stream in streams:
-                stream_type = stream.get('type').lower()
-                # smooth streaming is not supported
-                if stream_type in ['ss', 'ms']:
-                    continue
-                stream_info = self._download_json(
-                    'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp'
-                    % (stream.get('url'), token),
-                    display_id, 'Downloading %s JSON' % stream_type)
-                if stream_info.get('error_code', 0) or stream_info.get('errorcode', 0):
-                    continue
-                stream_url = self._download_json(
-                    stream_info['stream'], display_id,
-                    'Downloading %s URL' % stream_type,
-                    'Unable to download %s URL' % stream_type,
-                    transform_source=strip_jsonp, fatal=False)
-                if not stream_url:
-                    continue
-                if stream_type == 'hds':
-                    f4m_formats = self._extract_f4m_formats(stream_url, display_id)
-                    # f4m downloader downloads only piece of live stream
-                    for f4m_format in f4m_formats:
-                        f4m_format['preference'] = -1
-                    formats.extend(f4m_formats)
-                elif stream_type == 'hls':
-                    formats.extend(self._extract_m3u8_formats(stream_url, display_id, 'mp4'))
-                else:
-                    formats.append({
-                        'url': stream_url,
-                        'preference': -10,
-                    })
-
-        self._sort_formats(formats)
-
         return {
+            '_type': 'url_transparent',
+            'url': 'npo:%s' % live_id,
+            'ie_key': NPOIE.ie_key(),
             'id': live_id,
             'display_id': display_id,
-            'title': self._live_title(metadata['titel']),
-            'description': metadata['info'],
-            'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'],
-            'formats': formats,
-            'is_live': True,
         }
 
 
index fc7ff43a62ba4a1a294d368d0b8fbaeb9abdb2ed..58ffde541451bbaf863a82a0bb0986d27a8fcf4f 100644 (file)
@@ -75,22 +75,51 @@ class OpenloadIE(InfoExtractor):
             '<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>',
             webpage, 'openload ID')
 
-        first_char = int(ol_id[0])
-        urlcode = []
-        num = 1
-
-        while num < len(ol_id):
-            i = ord(ol_id[num])
-            key = 0
-            if i <= 90:
-                key = i - 65
-            elif i >= 97:
-                key = 25 + i - 97
-            urlcode.append((key, compat_chr(int(ol_id[num + 2:num + 5]) // int(ol_id[num + 1]) - first_char)))
-            num += 5
-
-        video_url = 'https://openload.co/stream/' + ''.join(
-            [value for _, value in sorted(urlcode, key=lambda x: x[0])])
+        video_url_chars = []
+
+        first_char = ord(ol_id[0])
+        key = first_char - 55
+        maxKey = max(2, key)
+        key = min(maxKey, len(ol_id) - 38)
+        t = ol_id[key:key + 36]
+
+        hashMap = {}
+        v = ol_id.replace(t, '')
+        h = 0
+
+        while h < len(t):
+            f = t[h:h + 3]
+            i = int(f, 8)
+            hashMap[h / 3] = i
+            h += 3
+
+        h = 0
+        H = 0
+        while h < len(v):
+            B = ''
+            C = ''
+            if len(v) >= h + 2:
+                B = v[h:h + 2]
+            if len(v) >= h + 3:
+                C = v[h:h + 3]
+            i = int(B, 16)
+            h += 2
+            if H % 3 == 0:
+                i = int(C, 8)
+                h += 1
+            elif H % 2 == 0 and H != 0 and ord(v[H - 1]) < 60:
+                i = int(C, 10)
+                h += 1
+            index = H % 7
+
+            A = hashMap[index]
+            i ^= 213
+            i ^= A
+            video_url_chars.append(compat_chr(i))
+            H += 1
+
+        video_url = 'https://openload.co/stream/%s?mime=true'
+        video_url = video_url % (''.join(video_url_chars))
 
         title = self._og_search_title(webpage, default=None) or self._search_regex(
             r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
index e0cbd045e4ad68517b425f9565590ab7a75f9019..e45d9fe552e2c5af4c3f35182aa8063dcca9e390 100644 (file)
@@ -40,7 +40,7 @@ class PluralsightIE(PluralsightBaseIE):
         'info_dict': {
             'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04',
             'ext': 'mp4',
-            'title': 'Management of SQL Server - Demo Monitoring',
+            'title': 'Demo Monitoring',
             'duration': 338,
         },
         'skip': 'Requires pluralsight account credentials',
@@ -169,11 +169,10 @@ class PluralsightIE(PluralsightBaseIE):
 
         collection = course['modules']
 
-        module, clip = None, None
+        clip = None
 
         for module_ in collection:
             if name in (module_.get('moduleName'), module_.get('name')):
-                module = module_
                 for clip_ in module_.get('clips', []):
                     clip_index = clip_.get('clipIndex')
                     if clip_index is None:
@@ -187,7 +186,7 @@ class PluralsightIE(PluralsightBaseIE):
         if not clip:
             raise ExtractorError('Unable to resolve clip')
 
-        title = '%s - %s' % (module['title'], clip['title'])
+        title = clip['title']
 
         QUALITIES = {
             'low': {'width': 640, 'height': 480},
index 9b413590a4078b9e962edc63912a85fbc8312523..b25f1f193fc7b1590d57966e7c819d16a4b43cd0 100644 (file)
@@ -1,7 +1,9 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import functools
 import itertools
+import operator
 # import os
 import re
 
@@ -18,6 +20,7 @@ from ..utils import (
     js_to_json,
     orderedSet,
     # sanitized_Request,
+    remove_quotes,
     str_to_int,
 )
 # from ..aes import (
@@ -129,9 +132,32 @@ class PornHubIE(InfoExtractor):
 
         tv_webpage = dl_webpage('tv')
 
-        video_url = self._search_regex(
-            r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage,
-            'video url', group='url')
+        assignments = self._search_regex(
+            r'(var.+?mediastring.+?)</script>', tv_webpage,
+            'encoded url').split(';')
+
+        js_vars = {}
+
+        def parse_js_value(inp):
+            inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
+            if '+' in inp:
+                inps = inp.split('+')
+                return functools.reduce(
+                    operator.concat, map(parse_js_value, inps))
+            inp = inp.strip()
+            if inp in js_vars:
+                return js_vars[inp]
+            return remove_quotes(inp)
+
+        for assn in assignments:
+            assn = assn.strip()
+            if not assn:
+                continue
+            assn = re.sub(r'var\s+', '', assn)
+            vname, value = assn.split('=', 1)
+            js_vars[vname] = parse_js_value(value)
+
+        video_url = js_vars['mediastring']
 
         title = self._search_regex(
             r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None)
index 1245309a7ebc1e5621ee4c566ef64f16676c6011..d8a4bd2443304e2a890dbdcc873cb74a257d6a51 100644 (file)
@@ -300,6 +300,21 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
                 'skip_download': True,
             },
         },
+        {
+            # title in <h2 class="subtitle">
+            'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip',
+            'info_dict': {
+                'id': '4895826',
+                'ext': 'mp4',
+                'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe',
+                'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9',
+                'upload_date': '20170302',
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'skip': 'geo restricted to Germany',
+        },
         {
             # geo restricted to Germany
             'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge',
@@ -338,6 +353,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
         r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>',
         r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>',
         r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>',
+        r'<h2[^>]+class="subtitle"[^>]*>([^<]+)</h2>',
     ]
     _DESCRIPTION_REGEXES = [
         r'<p itemprop="description">\s*(.+?)</p>',
@@ -369,7 +385,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
     def _extract_clip(self, url, webpage):
         clip_id = self._html_search_regex(
             self._CLIPID_REGEXES, webpage, 'clip id')
-        title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title')
+        title = self._html_search_regex(
+            self._TITLE_REGEXES, webpage, 'title',
+            default=None) or self._og_search_title(webpage)
         info = self._extract_video_info(url, clip_id)
         description = self._html_search_regex(
             self._DESCRIPTION_REGEXES, webpage, 'description', default=None)
diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dl/extractor/redbulltv.py
new file mode 100644 (file)
index 0000000..afab624
--- /dev/null
@@ -0,0 +1,122 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    try_get,
+    # unified_timestamp,
+    ExtractorError,
+)
+
+
+class RedBullTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film)/(?P<id>AP-\w+)'
+    _TESTS = [{
+        # film
+        'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc',
+        'md5': 'fb0445b98aa4394e504b413d98031d1f',
+        'info_dict': {
+            'id': 'AP-1Q756YYX51W11',
+            'ext': 'mp4',
+            'title': 'ABC of...WRC',
+            'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31',
+            'duration': 1582.04,
+            # 'timestamp': 1488405786,
+            # 'upload_date': '20170301',
+        },
+    }, {
+        # episode
+        'url': 'https://www.redbull.tv/video/AP-1PMT5JCWH1W11/grime?playlist=shows:shows-playall:web',
+        'info_dict': {
+            'id': 'AP-1PMT5JCWH1W11',
+            'ext': 'mp4',
+            'title': 'Grime - Hashtags S2 E4',
+            'description': 'md5:334b741c8c1ce65be057eab6773c1cf5',
+            'duration': 904.6,
+            # 'timestamp': 1487290093,
+            # 'upload_date': '20170217',
+            'series': 'Hashtags',
+            'season_number': 2,
+            'episode_number': 4,
+        },
+    }, {
+        'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        session = self._download_json(
+            'https://api-v2.redbull.tv/session', video_id,
+            note='Downloading access token', query={
+                'build': '4.370.0',
+                'category': 'personal_computer',
+                'os_version': '1.0',
+                'os_family': 'http',
+            })
+        if session.get('code') == 'error':
+            raise ExtractorError('%s said: %s' % (
+                self.IE_NAME, session['message']))
+        auth = '%s %s' % (session.get('token_type', 'Bearer'), session['access_token'])
+
+        try:
+            info = self._download_json(
+                'https://api-v2.redbull.tv/content/%s' % video_id,
+                video_id, note='Downloading video information',
+                headers={'Authorization': auth}
+            )
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+                error_message = self._parse_json(
+                    e.cause.read().decode(), video_id)['message']
+                raise ExtractorError('%s said: %s' % (
+                    self.IE_NAME, error_message), expected=True)
+            raise
+
+        video = info['video_product']
+
+        title = info['title'].strip()
+
+        formats = self._extract_m3u8_formats(
+            video['url'], video_id, 'mp4', 'm3u8_native')
+        self._sort_formats(formats)
+
+        subtitles = {}
+        for _, captions in (try_get(
+                video, lambda x: x['attachments']['captions'],
+                dict) or {}).items():
+            if not captions or not isinstance(captions, list):
+                continue
+            for caption in captions:
+                caption_url = caption.get('url')
+                if not caption_url:
+                    continue
+                ext = caption.get('format')
+                if ext == 'xml':
+                    ext = 'ttml'
+                subtitles.setdefault(caption.get('lang') or 'en', []).append({
+                    'url': caption_url,
+                    'ext': ext,
+                })
+
+        subheading = info.get('subheading')
+        if subheading:
+            title += ' - %s' % subheading
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': info.get('long_description') or info.get(
+                'short_description'),
+            'duration': float_or_none(video.get('duration'), scale=1000),
+            # 'timestamp': unified_timestamp(info.get('published')),
+            'series': info.get('show_title'),
+            'season_number': int_or_none(info.get('season_number')),
+            'episode_number': int_or_none(info.get('episode_number')),
+            'formats': formats,
+            'subtitles': subtitles,
+        }
index fd1df925ba46bcecf87e192d2331da5e77d0b4bc..889fa76289dc75aa7e868ee2b7496ee69e2f0e72 100644 (file)
@@ -17,7 +17,7 @@ from ..utils import (
 class RutubeIE(InfoExtractor):
     IE_NAME = 'rutube'
     IE_DESC = 'Rutube videos'
-    _VALID_URL = r'https?://rutube\.ru/(?:video|play/embed)/(?P<id>[\da-z]{32})'
+    _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
 
     _TESTS = [{
         'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
@@ -39,8 +39,17 @@ class RutubeIE(InfoExtractor):
     }, {
         'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
         'only_matching': True,
+    }, {
+        'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
+        'only_matching': True,
     }]
 
+    @staticmethod
+    def _extract_urls(webpage):
+        return [mobj.group('url') for mobj in re.finditer(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/embed/[\da-z]{32}.*?)\1',
+            webpage)]
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video = self._download_json(
index 20d01754a17998f90c64f33cf76693028dd57103..6c09df25a07de17dfb29ead61bd1464e72a1c36a 100644 (file)
@@ -82,6 +82,9 @@ class RuutuIE(InfoExtractor):
                         formats.extend(self._extract_f4m_formats(
                             video_url, video_id, f4m_id='hds', fatal=False))
                     elif ext == 'mpd':
+                        # video-only and audio-only streams are of different
+                        # duration resulting in out of sync issue
+                        continue
                         formats.extend(self._extract_mpd_formats(
                             video_url, video_id, mpd_id='dash', fatal=False))
                     else:
index 387a4f7f6952adcb6d1954106ce580f44cde6e6f..db5ef8b57bba44449cef02bd17eb9dc363c56b15 100644 (file)
@@ -89,7 +89,7 @@ class SenateISVPIE(InfoExtractor):
     @staticmethod
     def _search_iframe_url(webpage):
         mobj = re.search(
-            r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]",
+            r"<iframe[^>]+src=['\"](?P<url>https?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]",
             webpage)
         if mobj:
             return mobj.group('url')
index b3aa4ce26ab95933b40f3606c86b8ae6cefc531b..0ee4a8ff8988de543e08e8a06d78dac2b27b018a 100644 (file)
@@ -121,7 +121,7 @@ class SoundcloudIE(InfoExtractor):
         },
     ]
 
-    _CLIENT_ID = 'fDoItMDbsbZz8dY16ZzARCZmzgHBPotA'
+    _CLIENT_ID = '2t9loNQH90kzJcsFCODdigxfp325aq4z'
     _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
 
     @staticmethod
index e973c867c1a23eeeacbbc706269e50900f4f60b9..9f5c237efc9283a9d59667b843268c31769ba11c 100644 (file)
@@ -65,7 +65,7 @@ class StreamableIE(InfoExtractor):
         # to return video info like the title properly sometimes, and doesn't
         # include info like the video duration
         video = self._download_json(
-            'https://streamable.com/ajax/videos/%s' % video_id, video_id)
+            'https://ajax.streamable.com/videos/%s' % video_id, video_id)
 
         # Format IDs:
         # 0 The video is being uploaded
index d5abfc9e44ec82b492fcd98d9e4429b40c5c05b9..fdcc7d5731b4833065912dd89e0132f385c817f3 100644 (file)
@@ -44,6 +44,10 @@ class TelecincoIE(MiTeleBaseIE):
     }, {
         'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
         'only_matching': True,
+    }, {
+        # ooyala video
+        'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 82d73c31d55c55dfb7cdf40f593265806f3fdb89..fafaa826fe91e2eaddc2fe9b700c7e0367797807 100644 (file)
@@ -2,15 +2,17 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     int_or_none,
     smuggle_url,
+    try_get,
 )
 
 
 class TeleQuebecIE(InfoExtractor):
     _VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P<id>\d+)'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://zonevideo.telequebec.tv/media/20984/le-couronnement-de-new-york/couronnement-de-new-york',
         'md5': 'fe95a0957e5707b1b01f5013e725c90f',
         'info_dict': {
@@ -18,10 +20,14 @@ class TeleQuebecIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Le couronnement de New York',
             'description': 'md5:f5b3d27a689ec6c1486132b2d687d432',
-            'upload_date': '20160220',
-            'timestamp': 1455965438,
+            'upload_date': '20170201',
+            'timestamp': 1485972222,
         }
-    }
+    }, {
+        # no description
+        'url': 'http://zonevideo.telequebec.tv/media/30261',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         media_id = self._match_id(url)
@@ -31,9 +37,13 @@ class TeleQuebecIE(InfoExtractor):
         return {
             '_type': 'url_transparent',
             'id': media_id,
-            'url': smuggle_url('limelight:media:' + media_data['streamInfo']['sourceId'], {'geo_countries': ['CA']}),
+            'url': smuggle_url(
+                'limelight:media:' + media_data['streamInfo']['sourceId'],
+                {'geo_countries': ['CA']}),
             'title': media_data['title'],
-            'description': media_data.get('descriptions', [{'text': None}])[0].get('text'),
-            'duration': int_or_none(media_data.get('durationInMilliseconds'), 1000),
+            'description': try_get(
+                media_data, lambda x: x['descriptions'][0]['text'], compat_str),
+            'duration': int_or_none(
+                media_data.get('durationInMilliseconds'), 1000),
             'ie_key': 'LimelightMedia',
         }
diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py
deleted file mode 100644 (file)
index fd145ba..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from .brightcove import BrightcoveLegacyIE
-from ..compat import (
-    compat_parse_qs,
-    compat_urlparse,
-)
-
-
-class TlcDeIE(InfoExtractor):
-    IE_NAME = 'tlc.de'
-    _VALID_URL = r'https?://(?:www\.)?tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?'
-
-    _TEST = {
-        'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
-        'info_dict': {
-            'id': '3235167922001',
-            'ext': 'mp4',
-            'title': 'Breaking Amish: Die Welt da draußen',
-            'description': (
-                'Vier Amische und eine Mennonitin wagen in New York'
-                '  den Sprung in ein komplett anderes Leben. Begleitet sie auf'
-                ' ihrem spannenden Weg.'),
-            'timestamp': 1396598084,
-            'upload_date': '20140404',
-            'uploader_id': '1659832546',
-        },
-    }
-    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        brightcove_id = mobj.group('id')
-        if not brightcove_id:
-            title = mobj.group('title')
-            webpage = self._download_webpage(url, title)
-            brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
-            brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0]
-        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
diff --git a/youtube_dl/extractor/toongoggles.py b/youtube_dl/extractor/toongoggles.py
new file mode 100644 (file)
index 0000000..b5ba1c0
--- /dev/null
@@ -0,0 +1,81 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_duration,
+)
+
+
+class ToonGogglesIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?toongoggles\.com/shows/(?P<show_id>\d+)(?:/[^/]+/episodes/(?P<episode_id>\d+))?'
+    _TESTS = [{
+        'url': 'http://www.toongoggles.com/shows/217143/bernard-season-2/episodes/217147/football',
+        'md5': '18289fc2b951eff6b953a9d8f01e6831',
+        'info_dict': {
+            'id': '217147',
+            'ext': 'mp4',
+            'title': 'Football',
+            'uploader_id': '1',
+            'description': 'Bernard decides to play football in order to be better than Lloyd and tries to beat him no matter how, he even cheats.',
+            'upload_date': '20160718',
+            'timestamp': 1468879330,
+        }
+    }, {
+        'url': 'http://www.toongoggles.com/shows/227759/om-nom-stories-around-the-world',
+        'info_dict': {
+            'id': '227759',
+            'title': 'Om Nom Stories Around The World',
+        },
+        'playlist_mincount': 11,
+    }]
+
+    def _call_api(self, action, page_id, query):
+        query.update({
+            'for_ng': 1,
+            'for_web': 1,
+            'show_meta': 1,
+            'version': 7.0,
+        })
+        return self._download_json('http://api.toongoggles.com/' + action, page_id, query=query)
+
+    def _parse_episode_data(self, episode_data):
+        title = episode_data['episode_name']
+
+        return {
+            '_type': 'url_transparent',
+            'id': episode_data['episode_id'],
+            'title': title,
+            'url': 'kaltura:513551:' + episode_data['entry_id'],
+            'thumbnail': episode_data.get('thumbnail_url'),
+            'description': episode_data.get('description'),
+            'duration': parse_duration(episode_data.get('hms')),
+            'series': episode_data.get('show_name'),
+            'season_number': int_or_none(episode_data.get('season_num')),
+            'episode_id': episode_data.get('episode_id'),
+            'episode': title,
+            'episode_number': int_or_none(episode_data.get('episode_num')),
+            'categories': episode_data.get('categories'),
+            'ie_key': 'Kaltura',
+        }
+
+    def _real_extract(self, url):
+        show_id, episode_id = re.match(self._VALID_URL, url).groups()
+        if episode_id:
+            episode_data = self._call_api('search', episode_id, {
+                'filter': 'episode',
+                'id': episode_id,
+            })['objects'][0]
+            return self._parse_episode_data(episode_data)
+        else:
+            show_data = self._call_api('getepisodesbyshow', show_id, {
+                'max': 1000000000,
+                'showid': show_id,
+            })
+            entries = []
+            for episode_data in show_data.get('objects', []):
+                entries.append(self._parse_episode_data(episode_data))
+            return self.playlist_result(entries, show_id, show_data.get('show_name'))
diff --git a/youtube_dl/extractor/tunepk.py b/youtube_dl/extractor/tunepk.py
new file mode 100644 (file)
index 0000000..9d42651
--- /dev/null
@@ -0,0 +1,90 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    int_or_none,
+    try_get,
+    unified_timestamp,
+)
+
+
+class TunePkIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            (?:www\.)?tune\.pk/(?:video/|player/embed_player.php?.*?\bvid=)|
+                            embed\.tune\.pk/play/
+                        )
+                        (?P<id>\d+)
+                    '''
+    _TESTS = [{
+        'url': 'https://tune.pk/video/6919541/maudie-2017-international-trailer-1-ft-ethan-hawke-sally-hawkins',
+        'md5': '0c537163b7f6f97da3c5dd1e3ef6dd55',
+        'info_dict': {
+            'id': '6919541',
+            'ext': 'mp4',
+            'title': 'Maudie (2017) | International Trailer # 1 ft Ethan Hawke, Sally Hawkins',
+            'description': 'md5:eb5a04114fafef5cec90799a93a2d09c',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'timestamp': 1487327564,
+            'upload_date': '20170217',
+            'uploader': 'Movie Trailers',
+            'duration': 107,
+            'view_count': int,
+        }
+    }, {
+        'url': 'https://tune.pk/player/embed_player.php?vid=6919541&folder=2017/02/17/&width=600&height=350&autoplay=no',
+        'only_matching': True,
+    }, {
+        'url': 'https://embed.tune.pk/play/6919541?autoplay=no&ssl=yes&inline=true',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'https://tune.pk/video/%s' % video_id, video_id)
+
+        details = self._parse_json(
+            self._search_regex(
+                r'new\s+TunePlayer\(({.+?})\)\s*;\s*\n', webpage, 'tune player'),
+            video_id)['details']
+
+        video = details['video']
+        title = video.get('title') or self._og_search_title(
+            webpage, default=None) or self._html_search_meta(
+            'title', webpage, 'title', fatal=True)
+
+        formats = self._parse_jwplayer_formats(
+            details['player']['sources'], video_id)
+        self._sort_formats(formats)
+
+        description = self._og_search_description(
+            webpage, default=None) or self._html_search_meta(
+            'description', webpage, 'description')
+
+        thumbnail = video.get('thumb') or self._og_search_thumbnail(
+            webpage, default=None) or self._html_search_meta(
+            'thumbnail', webpage, 'thumbnail')
+
+        timestamp = unified_timestamp(video.get('date_added'))
+        uploader = try_get(
+            video, lambda x: x['uploader']['name'],
+            compat_str) or self._html_search_meta('author', webpage, 'author')
+
+        duration = int_or_none(video.get('duration'))
+        view_count = int_or_none(video.get('views'))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'duration': duration,
+            'view_count': view_count,
+            'formats': formats,
+        }
index f3541b6540c2b772afbc2d89d097f75ac531a2b3..7af11659f36e9fb2eb570bcee4d897597e3af6da 100644 (file)
@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     parse_iso8601,
@@ -12,7 +14,7 @@ from ..utils import (
 
 class TwentyFourVideoIE(InfoExtractor):
     IE_NAME = '24video'
-    _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex|tube)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
+    _VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sex|tube))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.24video.net/video/view/1044982',
@@ -43,10 +45,12 @@ class TwentyFourVideoIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        host = mobj.group('host')
 
         webpage = self._download_webpage(
-            'http://www.24video.sex/video/view/%s' % video_id, video_id)
+            'http://%s/video/view/%s' % (host, video_id), video_id)
 
         title = self._og_search_title(webpage)
         description = self._html_search_regex(
@@ -72,11 +76,11 @@ class TwentyFourVideoIE(InfoExtractor):
 
         # Sets some cookies
         self._download_xml(
-            r'http://www.24video.sex/video/xml/%s?mode=init' % video_id,
+            r'http://%s/video/xml/%s?mode=init' % (host, video_id),
             video_id, 'Downloading init XML')
 
         video_xml = self._download_xml(
-            'http://www.24video.sex/video/xml/%s?mode=play' % video_id,
+            'http://%s/video/xml/%s?mode=play' % (host, video_id),
             video_id, 'Downloading video XML')
 
         video = xpath_element(video_xml, './/video', 'video', fatal=True)
index bbba394b0ede953f60c179970c8b41d4013f69c2..2daf9dfac923260d94e26d56d6234ae9e3c6ec58 100644 (file)
@@ -12,7 +12,6 @@ from ..compat import (
     compat_str,
     compat_urllib_parse_urlencode,
     compat_urllib_parse_urlparse,
-    compat_urlparse,
 )
 from ..utils import (
     clean_html,
@@ -24,6 +23,7 @@ from ..utils import (
     parse_iso8601,
     update_url_query,
     urlencode_postdata,
+    urljoin,
 )
 
 
@@ -32,7 +32,7 @@ class TwitchBaseIE(InfoExtractor):
 
     _API_BASE = 'https://api.twitch.tv'
     _USHER_BASE = 'https://usher.ttvnw.net'
-    _LOGIN_URL = 'http://www.twitch.tv/login'
+    _LOGIN_URL = 'https://www.twitch.tv/login'
     _CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6'
     _NETRC_MACHINE = 'twitch'
 
@@ -64,6 +64,35 @@ class TwitchBaseIE(InfoExtractor):
             raise ExtractorError(
                 'Unable to login. Twitch said: %s' % message, expected=True)
 
+        def login_step(page, urlh, note, data):
+            form = self._hidden_inputs(page)
+            form.update(data)
+
+            page_url = urlh.geturl()
+            post_url = self._search_regex(
+                r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
+                'post url', default=page_url, group='url')
+            post_url = urljoin(page_url, post_url)
+
+            headers = {'Referer': page_url}
+
+            try:
+                response = self._download_json(
+                    post_url, None, note,
+                    data=urlencode_postdata(form),
+                    headers=headers)
+            except ExtractorError as e:
+                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+                    response = self._parse_json(
+                        e.cause.read().decode('utf-8'), None)
+                    fail(response['message'])
+                raise
+
+            redirect_url = urljoin(post_url, response['redirect'])
+            return self._download_webpage_handle(
+                redirect_url, None, 'Downloading login redirect page',
+                headers=headers)
+
         login_page, handle = self._download_webpage_handle(
             self._LOGIN_URL, None, 'Downloading login page')
 
@@ -71,40 +100,19 @@ class TwitchBaseIE(InfoExtractor):
         if 'blacklist_message' in login_page:
             fail(clean_html(login_page))
 
-        login_form = self._hidden_inputs(login_page)
-
-        login_form.update({
-            'username': username,
-            'password': password,
-        })
-
-        redirect_url = handle.geturl()
-
-        post_url = self._search_regex(
-            r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
-            'post url', default=redirect_url, group='url')
-
-        if not post_url.startswith('http'):
-            post_url = compat_urlparse.urljoin(redirect_url, post_url)
-
-        headers = {'Referer': redirect_url}
+        redirect_page, handle = login_step(
+            login_page, handle, 'Logging in as %s' % username, {
+                'username': username,
+                'password': password,
+            })
 
-        try:
-            response = self._download_json(
-                post_url, None, 'Logging in as %s' % username,
-                data=urlencode_postdata(login_form),
-                headers=headers)
-        except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
-                response = self._parse_json(
-                    e.cause.read().decode('utf-8'), None)
-                fail(response['message'])
-            raise
-
-        if response.get('redirect'):
-            self._download_webpage(
-                response['redirect'], None, 'Downloading login redirect page',
-                headers=headers)
+        if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
+            # TODO: Add mechanism to request an SMS or phone call
+            tfa_token = self._get_tfa_info('two-factor authentication token')
+            login_step(redirect_page, handle, 'Submitting TFA token', {
+                'authy_token': tfa_token,
+                'remember_2fa': 'true',
+            })
 
     def _prefer_source(self, formats):
         try:
index d26fb49b3939728e8a962b2ad3131c71fd223366..5086f591e56b6b6a9266e96b3c26541963757120 100644 (file)
@@ -9,7 +9,7 @@ from .common import InfoExtractor
 
 class VierIE(InfoExtractor):
     IE_NAME = 'vier'
-    _VALID_URL = r'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
+    _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
     _TESTS = [{
         'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
         'info_dict': {
@@ -23,6 +23,19 @@ class VierIE(InfoExtractor):
             # m3u8 download
             'skip_download': True,
         },
+    }, {
+        'url': 'http://www.vijf.be/temptationisland/videos/zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas/2561614',
+        'info_dict': {
+            'id': '2561614',
+            'display_id': 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas',
+            'ext': 'mp4',
+            'title': 'ZO grappig: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma\'s',
+            'description': 'Het spel is simpel: Annelien Coorevits en Rick Brandsteder krijgen telkens 2 dilemma\'s voorgeschoteld en ze MOETEN een keuze maken.',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen',
         'only_matching': True,
@@ -35,6 +48,7 @@ class VierIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         embed_id = mobj.group('embed_id')
         display_id = mobj.group('display_id') or embed_id
+        site = mobj.group('site')
 
         webpage = self._download_webpage(url, display_id)
 
@@ -43,7 +57,7 @@ class VierIE(InfoExtractor):
             webpage, 'video id')
         application = self._search_regex(
             [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'],
-            webpage, 'application', default='vier_vod')
+            webpage, 'application', default=site + '_vod')
         filename = self._search_regex(
             [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
             webpage, 'filename')
@@ -68,13 +82,19 @@ class VierIE(InfoExtractor):
 
 class VierVideosIE(InfoExtractor):
     IE_NAME = 'vier:videos'
-    _VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'
     _TESTS = [{
         'url': 'http://www.vier.be/demoestuin/videos',
         'info_dict': {
             'id': 'demoestuin',
         },
         'playlist_mincount': 153,
+    }, {
+        'url': 'http://www.vijf.be/temptationisland/videos',
+        'info_dict': {
+            'id': 'temptationisland',
+        },
+        'playlist_mincount': 159,
     }, {
         'url': 'http://www.vier.be/demoestuin/videos?page=6',
         'info_dict': {
@@ -92,6 +112,7 @@ class VierVideosIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         program = mobj.group('program')
+        site = mobj.group('site')
 
         page_id = mobj.group('page')
         if page_id:
@@ -105,13 +126,13 @@ class VierVideosIE(InfoExtractor):
         entries = []
         for current_page_id in itertools.count(start_page):
             current_page = self._download_webpage(
-                'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
+                'http://www.%s.be/%s/videos?page=%d' % (site, program, current_page_id),
                 program,
                 'Downloading page %d' % (current_page_id + 1))
             page_entries = [
-                self.url_result('http://www.vier.be' + video_url, 'Vier')
+                self.url_result('http://www.' + site + '.be' + video_url, 'Vier')
                 for video_url in re.findall(
-                    r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
+                    r'<h[23]><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
             entries.extend(page_entries)
             if page_id or '>Meer<' not in current_page:
                 break
index 3fd889c8e95cca15b16cf9d022ac61813433d7c4..db6a65d2ed93b233b050561c56fa7b6133ddb1b7 100644 (file)
@@ -44,7 +44,7 @@ class ViuBaseIE(InfoExtractor):
 
 
 class ViuIE(ViuBaseIE):
-    _VALID_URL = r'(?:viu:|https?://www\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)'
+    _VALID_URL = r'(?:viu:|https?://[^/]+\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)'
     _TESTS = [{
         'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059',
         'info_dict': {
@@ -69,6 +69,9 @@ class ViuIE(ViuBaseIE):
             'skip_download': 'm3u8 download',
         },
         'skip': 'Geo-restricted to Indonesia',
+    }, {
+        'url': 'https://india.viu.com/en/media/1126286865',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 7c42a4f54864eeb370c6a4583daae950262b45b8..dc2719cf987981999a31487966508e589c86f64b 100644 (file)
@@ -432,8 +432,7 @@ class VKIE(VKBaseIE):
                 })
             elif format_id == 'hls':
                 formats.extend(self._extract_m3u8_formats(
-                    format_url, video_id, 'mp4',
-                    entry_protocol='m3u8' if is_live else 'm3u8_native',
+                    format_url, video_id, 'mp4', 'm3u8_native',
                     m3u8_id=format_id, fatal=False, live=is_live))
             elif format_id == 'rtmp':
                 formats.append({
diff --git a/youtube_dl/extractor/vrak.py b/youtube_dl/extractor/vrak.py
new file mode 100644 (file)
index 0000000..daa247c
--- /dev/null
@@ -0,0 +1,80 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveNewIE
+from ..utils import (
+    int_or_none,
+    parse_age_limit,
+    smuggle_url,
+    unescapeHTML,
+)
+
+
+class VrakIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?\btarget=(?P<id>[\d.]+)'
+    _TEST = {
+        'url': 'http://www.vrak.tv/videos?target=1.2306782&filtre=emission&id=1.1806721',
+        'info_dict': {
+            'id': '5345661243001',
+            'ext': 'mp4',
+            'title': 'Obésité, film de hockey et Roseline Filion',
+            'timestamp': 1488492126,
+            'upload_date': '20170302',
+            'uploader_id': '2890187628001',
+            'creator': 'VRAK.TV',
+            'age_limit': 8,
+            'series': 'ALT (Actualité Légèrement Tordue)',
+            'episode': 'Obésité, film de hockey et Roseline Filion',
+            'tags': list,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2890187628001/default_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(
+            r'<h\d\b[^>]+\bclass=["\']videoTitle["\'][^>]*>([^<]+)',
+            webpage, 'title', default=None) or self._og_search_title(webpage)
+
+        content = self._parse_json(
+            self._search_regex(
+                r'data-player-options-content=(["\'])(?P<content>{.+?})\1',
+                webpage, 'content', default='{}', group='content'),
+            video_id, transform_source=unescapeHTML)
+
+        ref_id = content.get('refId') or self._search_regex(
+            r'refId&quot;:&quot;([^&]+)&quot;', webpage, 'ref id')
+
+        brightcove_id = self._search_regex(
+            r'''(?x)
+                java\.lang\.String\s+value\s*=\s*["']brightcove\.article\.\d+\.%s
+                [^>]*
+                java\.lang\.String\s+value\s*=\s*["'](\d+)
+            ''' % re.escape(ref_id), webpage, 'brightcove id')
+
+        return {
+            '_type': 'url_transparent',
+            'ie_key': BrightcoveNewIE.ie_key(),
+            'url': smuggle_url(
+                self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+                {'geo_countries': ['CA']}),
+            'id': brightcove_id,
+            'description': content.get('description'),
+            'creator': content.get('brand'),
+            'age_limit': parse_age_limit(content.get('rating')),
+            'series': content.get('showName') or content.get(
+                'episodeName'),  # this is intentional
+            'season_number': int_or_none(content.get('seasonNumber')),
+            'episode': title,
+            'episode_number': int_or_none(content.get('episodeNumber')),
+            'tags': content.get('tags', []),
+        }
index f7e6360a33e8b6d2cc3096232bfa1d2c458ab3c7..8bb7362bbc1bec46dcef081bb08a02728d4af3e0 100644 (file)
@@ -19,9 +19,10 @@ class WDRBaseIE(InfoExtractor):
     def _extract_wdr_video(self, webpage, display_id):
         # for wdr.de the data-extension is in a tag with the class "mediaLink"
         # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
-        # for wdrmaus its in a link to the page in a multiline "videoLink"-tag
+        # for wdrmaus, in a tag with the class "videoButton" (previously a link
+        # to the page in a multiline "videoLink"-tag)
         json_metadata = self._html_search_regex(
-            r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"',
+            r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"',
             webpage, 'media link', default=None, flags=re.MULTILINE)
 
         if not json_metadata:
@@ -32,7 +33,7 @@ class WDRBaseIE(InfoExtractor):
         jsonp_url = media_link_obj['mediaObj']['url']
 
         metadata = self._download_json(
-            jsonp_url, 'metadata', transform_source=strip_jsonp)
+            jsonp_url, display_id, transform_source=strip_jsonp)
 
         metadata_tracker_data = metadata['trackerData']
         metadata_media_resource = metadata['mediaResource']
@@ -161,23 +162,23 @@ class WDRIE(WDRBaseIE):
         {
             'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
             'info_dict': {
-                'id': 'mdb-1096487',
-                'ext': 'flv',
+                'id': 'mdb-1323501',
+                'ext': 'mp4',
                 'upload_date': 're:^[0-9]{8}$',
                 'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$',
-                'description': '- Die Sendung mit der Maus -',
+                'description': 'Die Seite mit der Maus -',
             },
             'skip': 'The id changes from week to week because of the new episode'
         },
         {
-            'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5',
+            'url': 'http://www.wdrmaus.de/filme/sachgeschichten/achterbahn.php5',
             'md5': '803138901f6368ee497b4d195bb164f2',
             'info_dict': {
                 'id': 'mdb-186083',
                 'ext': 'mp4',
                 'upload_date': '20130919',
                 'title': 'Sachgeschichte - Achterbahn ',
-                'description': '- Die Sendung mit der Maus -',
+                'description': 'Die Seite mit der Maus -',
             },
         },
         {
@@ -186,7 +187,7 @@ class WDRIE(WDRBaseIE):
             'info_dict': {
                 'id': 'mdb-869971',
                 'ext': 'flv',
-                'title': 'Funkhaus Europa Livestream',
+                'title': 'COSMO Livestream',
                 'description': 'md5:2309992a6716c347891c045be50992e4',
                 'upload_date': '20160101',
             },
index 81c7939215bcfccb60b11b2ff28336f0f8d163c5..ca40de522bc5e341f2ac269db997a0e73914c127 100644 (file)
@@ -59,6 +59,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     # If True it will raise an error if no login info is provided
     _LOGIN_REQUIRED = False
 
+    _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
+
     def _set_language(self):
         self._set_cookie(
             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
@@ -265,9 +267,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                          )
                      )?                                                       # all until now is optional -> you can pass the naked ID
                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
-                     (?!.*?\blist=)                                            # combined list/video URLs are handled by the playlist IE
+                     (?!.*?\blist=
+                        (?:
+                            %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
+                            WL                                                # WL are handled by the watch later IE
+                        )
+                     )
                      (?(1).+)?                                                # if we found the ID, everything can follow
-                     $"""
+                     $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
     _formats = {
         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
@@ -924,6 +931,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'url': 'sJL6WA-aGkQ',
             'only_matching': True,
         },
+        {
+            'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
+            'only_matching': True,
+        },
     ]
 
     def __init__(self, *args, **kwargs):
@@ -1454,7 +1465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         # Check for "rental" videos
         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
-            raise ExtractorError('"rental" videos not supported')
+            raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
 
         # Start extracting information
         self.report_information_extraction(video_id)
@@ -1864,8 +1875,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
                         )
                         .*
                      |
-                        ((?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,})
-                     )"""
+                        (%(playlist_id)s)
+                     )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
     IE_NAME = 'youtube:playlist'
index 8b51d3c6f4cd1ecc3bf0c57b6ed42e83a1b30613..6b811535f474dd6f5a1d6724c04730f69b7a16cc 100644 (file)
@@ -773,7 +773,7 @@ def parseOpts(overrideArguments=None):
         help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
     postproc.add_option(
         '--audio-format', metavar='FORMAT', dest='audioformat', default='best',
-        help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default; No effect without -x')
+        help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x')
     postproc.add_option(
         '--audio-quality', metavar='QUALITY',
         dest='audioquality', default='5',
index 96ddb3b36f2b9219af7edfe209b09ca3f34ae546..7c162d92abb43f9cf522eeeb52bb9675a06a24f7 100644 (file)
@@ -26,15 +26,25 @@ from ..utils import (
 
 
 EXT_TO_OUT_FORMATS = {
-    "aac": "adts",
-    "m4a": "ipod",
-    "mka": "matroska",
-    "mkv": "matroska",
-    "mpg": "mpeg",
-    "ogv": "ogg",
-    "ts": "mpegts",
-    "wma": "asf",
-    "wmv": "asf",
+    'aac': 'adts',
+    'flac': 'flac',
+    'm4a': 'ipod',
+    'mka': 'matroska',
+    'mkv': 'matroska',
+    'mpg': 'mpeg',
+    'ogv': 'ogg',
+    'ts': 'mpegts',
+    'wma': 'asf',
+    'wmv': 'asf',
+}
+ACODECS = {
+    'mp3': 'libmp3lame',
+    'aac': 'aac',
+    'flac': 'flac',
+    'm4a': 'aac',
+    'opus': 'opus',
+    'vorbis': 'libvorbis',
+    'wav': None,
 }
 
 
@@ -237,7 +247,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
                 acodec = 'copy'
                 extension = 'm4a'
                 more_opts = ['-bsf:a', 'aac_adtstoasc']
-            elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
+            elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']:
                 # Lossless if possible
                 acodec = 'copy'
                 extension = filecodec
@@ -256,8 +266,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
                     else:
                         more_opts += ['-b:a', self._preferredquality + 'k']
         else:
-            # We convert the audio (lossy)
-            acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
+            # We convert the audio (lossy if codec is lossy)
+            acodec = ACODECS[self._preferredcodec]
             extension = self._preferredcodec
             more_opts = []
             if self._preferredquality is not None:
index 807183f4a72a767b9b30b4878c15934f300def76..2340bc306b69652d43246472e7fd3f5e819e2af2 100644 (file)
@@ -39,6 +39,7 @@ from .compat import (
     compat_basestring,
     compat_chr,
     compat_etree_fromstring,
+    compat_expanduser,
     compat_html_entities,
     compat_html_entities_html5,
     compat_http_client,
@@ -473,7 +474,8 @@ def timeconvert(timestr):
 def sanitize_filename(s, restricted=False, is_id=False):
     """Sanitizes a string so it could be used as part of a filename.
     If restricted is set, use a stricter subset of allowed characters.
-    Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
+    Set is_id if this is not an arbitrary string, but an ID that should be kept
+    if possible.
     """
     def replace_insane(char):
         if restricted and char in ACCENT_CHARS:
@@ -538,6 +540,11 @@ def sanitized_Request(url, *args, **kwargs):
     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
 
 
+def expand_path(s):
+    """Expand shell variables and ~"""
+    return os.path.expandvars(compat_expanduser(s))
+
+
 def orderedSet(iterable):
     """ Remove all duplicates from the input iterable """
     res = []
@@ -1747,11 +1754,16 @@ def base_url(url):
 
 
 def urljoin(base, path):
+    if isinstance(path, bytes):
+        path = path.decode('utf-8')
     if not isinstance(path, compat_str) or not path:
         return None
     if re.match(r'^(?:https?:)?//', path):
         return path
-    if not isinstance(base, compat_str) or not re.match(r'^(?:https?:)?//', base):
+    if isinstance(base, bytes):
+        base = base.decode('utf-8')
+    if not isinstance(base, compat_str) or not re.match(
+            r'^(?:https?:)?//', base):
         return None
     return compat_urlparse.urljoin(base, path)
 
index 261218b803e488ead7b09542b496ac596c6062d4..13904c724945e01c3d2ee7c70f20467a719fb721 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2017.02.27'
+__version__ = '2017.03.24'