Merge remote-tracking branch 'duncankl/airmozilla'

author Philipp Hagemeister <phihag@phihag.de>

Thu, 26 Feb 2015 00:15:08 +0000 (01:15 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Thu, 26 Feb 2015 00:15:08 +0000 (01:15 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Thu, 26 Feb 2015 00:15:08 +0000 (01:15 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Thu, 26 Feb 2015 00:15:08 +0000 (01:15 +0100)
diff --git a/AUTHORS b/AUTHORS

index 47f12a9eefbf2fb0c050c38f605b2bed8170c772..bdd2a15dcf910938857ed3d3fb1161c3fd72b72e 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -111,3 +111,4 @@ Paul Hartmann
  Frans de Jonge
  Robin de Rooij
  Ryan Schmidt
+Leslie P. Polzer
diff --git a/Makefile b/Makefile

index 573c826850241be4557a4bbb0b43538242db6704..7087329564aa17db726d22fb3ce1fded54778635 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -43,7 +43,7 @@ test:
  ot: offlinetest
  
  offlinetest: codetest
-       nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists
+       nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py
  
  tar: youtube-dl.tar.gz
  
diff --git a/README.md b/README.md

index 8ea31d6059f05ac179f3879622af26cfc37ac110..2c53e22115eb7caaab770e876b606c004e527aca 100644 (file)
--- a/README.md
+++ b/README.md
@@ -139,6 +139,8 @@ which means you can modify it, redistribute it or use it however you like.
                                       dislike_count <? 50 & description" .
      --no-playlist                    If the URL refers to a video and a
                                       playlist, download only the video.
+    --yes-playlist                   If the URL refers to a video and a
+                                     playlist, download the playlist.
      --age-limit YEARS                download only videos suitable for the given
                                       age
      --download-archive FILE          Download only videos not listed in the
@@ -351,8 +353,8 @@ which means you can modify it, redistribute it or use it however you like.
      --all-subs                       downloads all the available subtitles of
                                       the video
      --list-subs                      lists all available subtitles for the video
-    --sub-format FORMAT              subtitle format (default=srt) ([sbv/vtt]
-                                     youtube only)
+    --sub-format FORMAT              subtitle format, accepts formats
+                                     preference, for example: "ass/srt/best"
      --sub-lang LANGS                 languages of the subtitles to download
                                       (optional) separated by commas, use IETF
                                       language tags like 'en,pt'
diff --git a/docs/supportedsites.md b/docs/supportedsites.md

index 5fe3e47cd869a1bde22dc8854931f4a6a5a13700..9f70db80ac39c6eaac088d951eca4382e01946c2 100644 (file)
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -72,6 +72,8 @@
   - **CeskaTelevize**
   - **channel9**: Channel 9
   - **Chilloutzone**
+ - **chirbit**
+ - **chirbit:profile**
   - **Cinchcast**
   - **Cinemassacre**
   - **clipfish**
@@ -330,6 +332,7 @@
   - **prosiebensat1**: ProSiebenSat.1 Digital
   - **Pyvideo**
   - **QuickVid**
+ - **R7**
   - **radio.de**
   - **radiobremen**
   - **radiofrance**
@@ -385,7 +388,8 @@
   - **soundcloud:playlist**
   - **soundcloud:set**
   - **soundcloud:user**
- - **Soundgasm**
+ - **soundgasm**
+ - **soundgasm:profile**
   - **southpark.cc.com**
   - **southpark.de**
   - **Space**
diff --git a/test/parameters.json b/test/parameters.json

index af77b89b46a22cb29d5f71c7ea0d0e49660e5135..cbff9bd16486fcda2c155e6978c354e320cfc95b 100644 (file)
--- a/test/parameters.json
+++ b/test/parameters.json
@@ -28,7 +28,7 @@
      "retries": 10, 
      "simulate": false, 
      "subtitleslang": null, 
-    "subtitlesformat": "srt",
+    "subtitlesformat": "best",
      "test": true, 
      "updatetime": true, 
      "usenetrc": false, 
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py

index b1cd6a69f3ab0b909c1b328d83395aebee60ff44..055e4255583d500805facc4fc59e296170e876e4 100644 (file)
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -337,6 +337,65 @@ class TestFormatSelection(unittest.TestCase):
          downloaded = ydl.downloaded_info_dicts[0]
          self.assertEqual(downloaded['format_id'], 'G')
  
+    def test_subtitles(self):
+        def s_formats(lang, autocaption=False):
+            return [{
+                'ext': ext,
+                'url': 'http://localhost/video.%s.%s' % (lang, ext),
+                '_auto': autocaption,
+            } for ext in ['vtt', 'srt', 'ass']]
+        subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es'])
+        auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es'])
+        info_dict = {
+            'id': 'test',
+            'title': 'Test',
+            'url': 'http://localhost/video.mp4',
+            'subtitles': subtitles,
+            'automatic_captions': auto_captions,
+            'extractor': 'TEST',
+        }
+
+        def get_info(params={}):
+            params.setdefault('simulate', True)
+            ydl = YDL(params)
+            ydl.report_warning = lambda *args, **kargs: None
+            return ydl.process_video_result(info_dict, download=False)
+
+        result = get_info()
+        self.assertFalse(result.get('requested_subtitles'))
+        self.assertEqual(result['subtitles'], subtitles)
+        self.assertEqual(result['automatic_captions'], auto_captions)
+
+        result = get_info({'writesubtitles': True})
+        subs = result['requested_subtitles']
+        self.assertTrue(subs)
+        self.assertEqual(set(subs.keys()), set(['en']))
+        self.assertTrue(subs['en'].get('data') is None)
+        self.assertEqual(subs['en']['ext'], 'ass')
+
+        result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'})
+        subs = result['requested_subtitles']
+        self.assertEqual(subs['en']['ext'], 'srt')
+
+        result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']})
+        subs = result['requested_subtitles']
+        self.assertTrue(subs)
+        self.assertEqual(set(subs.keys()), set(['es', 'fr']))
+
+        result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
+        subs = result['requested_subtitles']
+        self.assertTrue(subs)
+        self.assertEqual(set(subs.keys()), set(['es', 'pt']))
+        self.assertFalse(subs['es']['_auto'])
+        self.assertTrue(subs['pt']['_auto'])
+
+        result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
+        subs = result['requested_subtitles']
+        self.assertTrue(subs)
+        self.assertEqual(set(subs.keys()), set(['es', 'pt']))
+        self.assertTrue(subs['es']['_auto'])
+        self.assertTrue(subs['pt']['_auto'])
+
      def test_add_extra_info(self):
          test_dict = {
              'extractor': 'Foo',
diff --git a/test/test_subtitles.py b/test/test_subtitles.py

index bcc69a7783c214bfa2e50deb28be8278291e5312..3f2d8a2ba74e6b4f04d4159a64deb1f69f9d105b 100644 (file)
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -18,6 +18,14 @@ from youtube_dl.extractor import (
      VimeoIE,
      WallaIE,
      CeskaTelevizeIE,
+    LyndaIE,
+    NPOIE,
+    ComedyCentralIE,
+    NRKTVIE,
+    RaiIE,
+    VikiIE,
+    ThePlatformIE,
+    RTVEALaCartaIE,
  )
  
  
@@ -27,42 +35,38 @@ class BaseTestSubtitles(unittest.TestCase):
  
      def setUp(self):
          self.DL = FakeYDL()
-        self.ie = self.IE(self.DL)
+        self.ie = self.IE()
+        self.DL.add_info_extractor(self.ie)
  
      def getInfoDict(self):
-        info_dict = self.ie.extract(self.url)
+        info_dict = self.DL.extract_info(self.url, download=False)
          return info_dict
  
      def getSubtitles(self):
          info_dict = self.getInfoDict()
-        return info_dict['subtitles']
+        subtitles = info_dict['requested_subtitles']
+        if not subtitles:
+            return subtitles
+        for sub_info in subtitles.values():
+            if sub_info.get('data') is None:
+                uf = self.DL.urlopen(sub_info['url'])
+                sub_info['data'] = uf.read().decode('utf-8')
+        return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
  
  
  class TestYoutubeSubtitles(BaseTestSubtitles):
      url = 'QRS8MkLhQmM'
      IE = YoutubeIE
  
-    def test_youtube_no_writesubtitles(self):
-        self.DL.params['writesubtitles'] = False
-        subtitles = self.getSubtitles()
-        self.assertEqual(subtitles, None)
-
-    def test_youtube_subtitles(self):
-        self.DL.params['writesubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
-
-    def test_youtube_subtitles_lang(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitleslangs'] = ['it']
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
-
      def test_youtube_allsubtitles(self):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
          self.assertEqual(len(subtitles.keys()), 13)
+        self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
+        self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
+        for lang in ['it', 'fr', 'de']:
+            self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
  
      def test_youtube_subtitles_sbv_format(self):
          self.DL.params['writesubtitles'] = True
@@ -76,12 +80,6 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
          subtitles = self.getSubtitles()
          self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
  
-    def test_youtube_list_subtitles(self):
-        self.DL.expect_warning('Video doesn\'t have automatic captions')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-
      def test_youtube_automatic_captions(self):
          self.url = '8YoUxe5ncPo'
          self.DL.params['writeautomaticsub'] = True
@@ -103,55 +101,22 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles), 0)
-
-    def test_youtube_multiple_langs(self):
-        self.url = 'QRS8MkLhQmM'
-        self.DL.params['writesubtitles'] = True
-        langs = ['it', 'fr', 'de']
-        self.DL.params['subtitleslangs'] = langs
-        subtitles = self.getSubtitles()
-        for lang in langs:
-            self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
+        self.assertFalse(subtitles)
  
  
  class TestDailymotionSubtitles(BaseTestSubtitles):
      url = 'http://www.dailymotion.com/video/xczg00'
      IE = DailymotionIE
  
-    def test_no_writesubtitles(self):
-        subtitles = self.getSubtitles()
-        self.assertEqual(subtitles, None)
-
-    def test_subtitles(self):
-        self.DL.params['writesubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
-
-    def test_subtitles_lang(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitleslangs'] = ['fr']
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
-
      def test_allsubtitles(self):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
          self.assertTrue(len(subtitles.keys()) >= 6)
-
-    def test_list_subtitles(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-
-    def test_automatic_captions(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['writeautomaticsub'] = True
-        self.DL.params['subtitleslang'] = ['en']
-        subtitles = self.getSubtitles()
-        self.assertTrue(len(subtitles.keys()) == 0)
+        self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
+        self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
+        for lang in ['es', 'fr', 'de']:
+            self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
  
      def test_nosubtitles(self):
          self.DL.expect_warning('video doesn\'t have subtitles')
@@ -159,61 +124,21 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles), 0)
-
-    def test_multiple_langs(self):
-        self.DL.params['writesubtitles'] = True
-        langs = ['es', 'fr', 'de']
-        self.DL.params['subtitleslangs'] = langs
-        subtitles = self.getSubtitles()
-        for lang in langs:
-            self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
+        self.assertFalse(subtitles)
  
  
  class TestTedSubtitles(BaseTestSubtitles):
      url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
      IE = TEDIE
  
-    def test_no_writesubtitles(self):
-        subtitles = self.getSubtitles()
-        self.assertEqual(subtitles, None)
-
-    def test_subtitles(self):
-        self.DL.params['writesubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
-
-    def test_subtitles_lang(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitleslangs'] = ['fr']
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
-
      def test_allsubtitles(self):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
          self.assertTrue(len(subtitles.keys()) >= 28)
-
-    def test_list_subtitles(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-
-    def test_automatic_captions(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['writeautomaticsub'] = True
-        self.DL.params['subtitleslang'] = ['en']
-        subtitles = self.getSubtitles()
-        self.assertTrue(len(subtitles.keys()) == 0)
-
-    def test_multiple_langs(self):
-        self.DL.params['writesubtitles'] = True
-        langs = ['es', 'fr', 'de']
-        self.DL.params['subtitleslangs'] = langs
-        subtitles = self.getSubtitles()
-        for lang in langs:
+        self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
+        self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
+        for lang in ['es', 'fr', 'de']:
              self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
  
  
@@ -221,14 +146,7 @@ class TestBlipTVSubtitles(BaseTestSubtitles):
      url = 'http://blip.tv/a/a-6603250'
      IE = BlipTVIE
  
-    def test_list_subtitles(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-
      def test_allsubtitles(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
@@ -240,39 +158,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
      url = 'http://vimeo.com/76979871'
      IE = VimeoIE
  
-    def test_no_writesubtitles(self):
-        subtitles = self.getSubtitles()
-        self.assertEqual(subtitles, None)
-
-    def test_subtitles(self):
-        self.DL.params['writesubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
-
-    def test_subtitles_lang(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitleslangs'] = ['fr']
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
-
      def test_allsubtitles(self):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
          self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
-
-    def test_list_subtitles(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-
-    def test_automatic_captions(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['writeautomaticsub'] = True
-        self.DL.params['subtitleslang'] = ['en']
-        subtitles = self.getSubtitles()
-        self.assertTrue(len(subtitles.keys()) == 0)
+        self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
+        self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
  
      def test_nosubtitles(self):
          self.DL.expect_warning('video doesn\'t have subtitles')
@@ -280,27 +172,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles), 0)
-
-    def test_multiple_langs(self):
-        self.DL.params['writesubtitles'] = True
-        langs = ['es', 'fr', 'de']
-        self.DL.params['subtitleslangs'] = langs
-        subtitles = self.getSubtitles()
-        for lang in langs:
-            self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
+        self.assertFalse(subtitles)
  
  
  class TestWallaSubtitles(BaseTestSubtitles):
      url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
      IE = WallaIE
  
-    def test_list_subtitles(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-
      def test_allsubtitles(self):
          self.DL.expect_warning('Automatic Captions not supported by this server')
          self.DL.params['writesubtitles'] = True
@@ -315,19 +193,13 @@ class TestWallaSubtitles(BaseTestSubtitles):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles), 0)
+        self.assertFalse(subtitles)
  
  
  class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
      url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
      IE = CeskaTelevizeIE
  
-    def test_list_subtitles(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-
      def test_allsubtitles(self):
          self.DL.expect_warning('Automatic Captions not supported by this server')
          self.DL.params['writesubtitles'] = True
@@ -342,7 +214,110 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles), 0)
+        self.assertFalse(subtitles)
+
+
+class TestLyndaSubtitles(BaseTestSubtitles):
+    url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
+    IE = LyndaIE
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['en']))
+        self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
+
+
+class TestNPOSubtitles(BaseTestSubtitles):
+    url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
+    IE = NPOIE
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['nl']))
+        self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
+
+
+class TestMTVSubtitles(BaseTestSubtitles):
+    url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
+    IE = ComedyCentralIE
+
+    def getInfoDict(self):
+        return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['en']))
+        self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
+
+
+class TestNRKSubtitles(BaseTestSubtitles):
+    url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
+    IE = NRKTVIE
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['no']))
+        self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a')
+
+
+class TestRaiSubtitles(BaseTestSubtitles):
+    url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
+    IE = RaiIE
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['it']))
+        self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
+
+
+class TestVikiSubtitles(BaseTestSubtitles):
+    url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
+    IE = VikiIE
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['en']))
+        self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
+
+
+class TestThePlatformSubtitles(BaseTestSubtitles):
+    # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
+    # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
+    url = 'theplatform:JFUjUE1_ehvq'
+    IE = ThePlatformIE
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['en']))
+        self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
+
+
+class TestRtveSubtitles(BaseTestSubtitles):
+    url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
+    IE = RTVEALaCartaIE
+
+    def test_allsubtitles(self):
+        print('Skipping, only available from Spain')
+        return
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['es']))
+        self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
  
  
  if __name__ == '__main__':
diff --git a/test/test_utils.py b/test/test_utils.py

index c7373af1e2f64b8bc0bf0003d162b4e4b697eb87..2f8996d7bb5088e4c38470974b41e268fb260377 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -85,6 +85,8 @@ class TestUtil(unittest.TestCase):
          self.assertEqual(
              sanitize_filename('New World record at 0:12:34'),
              'New World record at 0_12_34')
+        self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
+        self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
  
          forbidden = '"\0\\/'
          for fc in forbidden:
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index ca7c3f5c6cc1e2fdb0669f2bfb22146510ff7865..76fc394bcff44f30ae6fa383ea54621a654a0864 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -154,7 +154,7 @@ class YoutubeDL(object):
      allsubtitles:      Downloads all the subtitles of the video
                         (requires writesubtitles or writeautomaticsub)
      listsubtitles:     Lists all available subtitles for the video
-    subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
+    subtitlesformat:   The format code for subtitles
      subtitleslangs:    List of languages of the subtitles to download
      keepvideo:         Keep the video file after post-processing
      daterange:         A DateRange object, download only if the upload_date is in the range.
@@ -1008,6 +1008,15 @@ class YoutubeDL(object):
                  info_dict['timestamp'])
              info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
  
+        if self.params.get('listsubtitles', False):
+            if 'automatic_captions' in info_dict:
+                self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
+            self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
+            return
+        info_dict['requested_subtitles'] = self.process_subtitles(
+            info_dict['id'], info_dict.get('subtitles'),
+            info_dict.get('automatic_captions'))
+
          # This extractors handle format selection themselves
          if info_dict['extractor'] in ['Youku']:
              if download:
@@ -1136,6 +1145,55 @@ class YoutubeDL(object):
          info_dict.update(formats_to_download[-1])
          return info_dict
  
+    def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
+        """Select the requested subtitles and their format"""
+        available_subs = {}
+        if normal_subtitles and self.params.get('writesubtitles'):
+            available_subs.update(normal_subtitles)
+        if automatic_captions and self.params.get('writeautomaticsub'):
+            for lang, cap_info in automatic_captions.items():
+                if lang not in available_subs:
+                    available_subs[lang] = cap_info
+
+        if (not self.params.get('writesubtitles') and not
+                self.params.get('writeautomaticsub') or not
+                available_subs):
+            return None
+
+        if self.params.get('allsubtitles', False):
+            requested_langs = available_subs.keys()
+        else:
+            if self.params.get('subtitleslangs', False):
+                requested_langs = self.params.get('subtitleslangs')
+            elif 'en' in available_subs:
+                requested_langs = ['en']
+            else:
+                requested_langs = [list(available_subs.keys())[0]]
+
+        formats_query = self.params.get('subtitlesformat', 'best')
+        formats_preference = formats_query.split('/') if formats_query else []
+        subs = {}
+        for lang in requested_langs:
+            formats = available_subs.get(lang)
+            if formats is None:
+                self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+                continue
+            for ext in formats_preference:
+                if ext == 'best':
+                    f = formats[-1]
+                    break
+                matches = list(filter(lambda f: f['ext'] == ext, formats))
+                if matches:
+                    f = matches[-1]
+                    break
+            else:
+                f = formats[-1]
+                self.report_warning(
+                    'No subtitle format found matching "%s" for language %s, '
+                    'using %s' % (formats_query, lang, f['ext']))
+            subs[lang] = f
+        return subs
+
      def process_info(self, info_dict):
          """Process a single resolved IE result."""
  
@@ -1238,15 +1296,22 @@ class YoutubeDL(object):
          subtitles_are_requested = any([self.params.get('writesubtitles', False),
                                         self.params.get('writeautomaticsub')])
  
-        if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
+        if subtitles_are_requested and info_dict.get('requested_subtitles'):
              # subtitles download errors are already managed as troubles in relevant IE
              # that way it will silently go on when used with unsupporting IE
-            subtitles = info_dict['subtitles']
-            sub_format = self.params.get('subtitlesformat', 'srt')
-            for sub_lang in subtitles.keys():
-                sub = subtitles[sub_lang]
-                if sub is None:
-                    continue
+            subtitles = info_dict['requested_subtitles']
+            for sub_lang, sub_info in subtitles.items():
+                sub_format = sub_info['ext']
+                if sub_info.get('data') is not None:
+                    sub_data = sub_info['data']
+                else:
+                    try:
+                        uf = self.urlopen(sub_info['url'])
+                        sub_data = uf.read().decode('utf-8')
+                    except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+                        self.report_warning('Unable to download subtitle for "%s": %s' %
+                                            (sub_lang, compat_str(err)))
+                        continue
                  try:
                      sub_filename = subtitles_filename(filename, sub_lang, sub_format)
                      if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
@@ -1254,7 +1319,7 @@ class YoutubeDL(object):
                      else:
                          self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
                          with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
-                            subfile.write(sub)
+                            subfile.write(sub_data)
                  except (OSError, IOError):
                      self.report_error('Cannot write subtitles file ' + sub_filename)
                      return
@@ -1564,6 +1629,17 @@ class YoutubeDL(object):
              ['ID', 'width', 'height', 'URL'],
              [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
  
+    def list_subtitles(self, video_id, subtitles, name='subtitles'):
+        if not subtitles:
+            self.to_screen('%s has no %s' % (video_id, name))
+            return
+        self.to_screen(
+            'Available %s for %s:' % (name, video_id))
+        self.to_screen(render_table(
+            ['Language', 'formats'],
+            [[lang, ', '.join(f['ext'] for f in reversed(formats))]
+                for lang, formats in subtitles.items()]))
+
      def urlopen(self, req):
          """ Start an HTTP download """
  
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index 25ab3fdfeeb880ceb787184f097df101fbc439f9..5ce20180098faf91adf598fac07e1c4553f3c746 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -226,7 +226,6 @@ def _real_main(argv=None):
      if opts.embedsubtitles:
          postprocessors.append({
              'key': 'FFmpegEmbedSubtitle',
-            'subtitlesformat': opts.subtitlesformat,
          })
      if opts.xattrs:
          postprocessors.append({'key': 'XAttrMetadata'})
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 7e18643159e279ceba5accef252534c6fa876ec0..67dca82aea8b19d65f211c9f9979d416b9286ff1 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -64,6 +64,10 @@ from .ccc import CCCIE
  from .ceskatelevize import CeskaTelevizeIE
  from .channel9 import Channel9IE
  from .chilloutzone import ChilloutzoneIE
+from .chirbit import (
+    ChirbitIE,
+    ChirbitProfileIE,
+)
  from .cinchcast import CinchcastIE
  from .clipfish import ClipfishIE
  from .cliphunter import CliphunterIE
@@ -365,6 +369,7 @@ from .promptfile import PromptFileIE
  from .prosiebensat1 import ProSiebenSat1IE
  from .pyvideo import PyvideoIE
  from .quickvid import QuickVidIE
+from .r7 import R7IE
  from .radiode import RadioDeIE
  from .radiobremen import RadioBremenIE
  from .radiofrance import RadioFranceIE
@@ -425,7 +430,10 @@ from .soundcloud import (
      SoundcloudUserIE,
      SoundcloudPlaylistIE
  )
-from .soundgasm import SoundgasmIE
+from .soundgasm import (
+    SoundgasmIE,
+    SoundgasmProfileIE
+)
  from .southpark import (
      SouthParkIE,
      SouthparkDeIE,
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py

index 43e82847ff8eb472bdac0a91c2138220f001b3b6..576f03b5b71115771555e1d8d46f4a108eb9de93 100644 (file)
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -11,8 +11,8 @@ from ..utils import (
  
  
  class AppleTrailersIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+    _TESTS = [{
          "url": "http://trailers.apple.com/trailers/wb/manofsteel/",
          'info_dict': {
              'id': 'manofsteel',
@@ -63,7 +63,10 @@ class AppleTrailersIE(InfoExtractor):
                  },
              },
          ]
-    }
+    }, {
+        'url': 'http://trailers.apple.com/ca/metropole/autrui/',
+        'only_matching': True,
+    }]
  
      _JSON_RE = r'iTunes.playURL\((.*?)\);'
  
diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py

index f016368fa8d0890de874a774b2a4a18db60a01c6..7669e0e3dc643b3bcf8d39663efcf6cba4540b04 100644 (file)
--- a/youtube_dl/extractor/atresplayer.py
+++ b/youtube_dl/extractor/atresplayer.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
  import time
  import hmac
  
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  from ..compat import (
      compat_str,
      compat_urllib_parse,
@@ -17,7 +17,7 @@ from ..utils import (
  )
  
  
-class AtresPlayerIE(SubtitlesInfoExtractor):
+class AtresPlayerIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
      _TESTS = [
          {
@@ -144,13 +144,12 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
          thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
  
          subtitles = {}
-        subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
-        if subtitle:
-            subtitles['es'] = subtitle
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
+        subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
+        if subtitle_url:
+            subtitles['es'] = [{
+                'ext': 'srt',
+                'url': subtitle_url,
+            }]
  
          return {
              'id': video_id,
@@ -159,5 +158,5 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
              'thumbnail': thumbnail,
              'duration': duration,
              'formats': formats,
-            'subtitles': self.extract_subtitles(video_id, subtitles),
+            'subtitles': subtitles,
          }
diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py

index f23e3954519546b91307189b1e14076ca1c4abe9..abc34a5761487b5a900294dac59db4a053b95cb0 100644 (file)
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@@ -2,12 +2,12 @@ from __future__ import unicode_literals
  
  import xml.etree.ElementTree
  
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  from ..utils import ExtractorError
  from ..compat import compat_HTTPError
  
  
-class BBCCoUkIE(SubtitlesInfoExtractor):
+class BBCCoUkIE(InfoExtractor):
      IE_NAME = 'bbc.co.uk'
      IE_DESC = 'BBC iPlayer'
      _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
@@ -215,17 +215,32 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
              formats.extend(conn_formats)
          return formats
  
-    def _extract_captions(self, media, programme_id):
+    def _get_subtitles(self, media, programme_id):
          subtitles = {}
          for connection in self._extract_connections(media):
              captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
              lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
              ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
              srt = ''
+
+            def _extract_text(p):
+                if p.text is not None:
+                    stripped_text = p.text.strip()
+                    if stripped_text:
+                        return stripped_text
+                return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
              for pos, p in enumerate(ps):
-                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
-                                                          p.text.strip() if p.text is not None else '')
-            subtitles[lang] = srt
+                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
+            subtitles[lang] = [
+                {
+                    'url': connection.get('href'),
+                    'ext': 'ttml',
+                },
+                {
+                    'data': srt,
+                    'ext': 'srt',
+                },
+            ]
          return subtitles
  
      def _download_media_selector(self, programme_id):
@@ -249,7 +264,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
              elif kind == 'video':
                  formats.extend(self._extract_video(media, programme_id))
              elif kind == 'captions':
-                subtitles = self._extract_captions(media, programme_id)
+                subtitles = self.extract_subtitles(media, programme_id)
  
          return formats, subtitles
  
@@ -324,10 +339,6 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
          else:
              programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
  
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(programme_id, subtitles)
-            return
-
          self._sort_formats(formats)
  
          return {
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py

index 436cc515563a07d853ced1b8373461a752ed6038..8c7ba4b910bcc78e5e3fa02d7168a9e4f443bf65 100644 (file)
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from .subtitles import SubtitlesInfoExtractor
  
  from ..compat import (
      compat_str,
@@ -18,7 +17,7 @@ from ..utils import (
  )
  
  
-class BlipTVIE(SubtitlesInfoExtractor):
+class BlipTVIE(InfoExtractor):
      _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
  
      _TESTS = [
@@ -143,7 +142,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
          categories = [category.text for category in item.findall('category')]
  
          formats = []
-        subtitles = {}
+        subtitles_urls = {}
  
          media_group = item.find(media('group'))
          for media_content in media_group.findall(media('content')):
@@ -161,7 +160,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
                  }
                  lang = role.rpartition('-')[-1].strip().lower()
                  langcode = LANGS.get(lang, lang)
-                subtitles[langcode] = url
+                subtitles_urls[langcode] = url
              elif media_type.startswith('video/'):
                  formats.append({
                      'url': real_url,
@@ -175,11 +174,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
                  })
          self._sort_formats(formats)
  
-        # subtitles
-        video_subtitles = self.extract_subtitles(video_id, subtitles)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
+        subtitles = self.extract_subtitles(video_id, subtitles_urls)
  
          return {
              'id': video_id,
@@ -192,15 +187,22 @@ class BlipTVIE(SubtitlesInfoExtractor):
              'thumbnail': thumbnail,
              'categories': categories,
              'formats': formats,
-            'subtitles': video_subtitles,
+            'subtitles': subtitles,
          }
  
-    def _download_subtitle_url(self, sub_lang, url):
-        # For some weird reason, blip.tv serves a video instead of subtitles
-        # when we request with a common UA
-        req = compat_urllib_request.Request(url)
-        req.add_header('User-Agent', 'youtube-dl')
-        return self._download_webpage(req, None, note=False)
+    def _get_subtitles(self, video_id, subtitles_urls):
+        subtitles = {}
+        for lang, url in subtitles_urls.items():
+            # For some weird reason, blip.tv serves a video instead of subtitles
+            # when we request with a common UA
+            req = compat_urllib_request.Request(url)
+            req.add_header('User-Agent', 'youtube-dl')
+            subtitles[lang] = [{
+                # The extension is 'srt' but it's actually an 'ass' file
+                'ext': 'ass',
+                'data': self._download_webpage(req, None, note=False),
+            }]
+        return subtitles
  
  
  class BlipTVUserIE(InfoExtractor):
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py

index c51a97ce4327cff934216927948587131dedfa80..4a88ccd13caf604f3ea892c6784d603434fb06ee 100644 (file)
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@@ -6,7 +6,7 @@ from .common import InfoExtractor
  
  
  class BloombergIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
+    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
  
      _TEST = {
          'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
@@ -20,9 +20,9 @@ class BloombergIE(InfoExtractor):
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        name = mobj.group('name')
+        name = self._match_id(url)
          webpage = self._download_webpage(url, name)
+
          f4m_url = self._search_regex(
              r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
              'f4m url')
diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py

index f70e090bb5b01942713149493e48bc0e51f7f74b..65f6be62313dfc623cf1f9aa7adc52282872aade 100644 (file)
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
  
  import re
  
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  from ..compat import (
      compat_urllib_request,
      compat_urllib_parse,
@@ -15,7 +15,7 @@ from ..utils import (
  )
  
  
-class CeskaTelevizeIE(SubtitlesInfoExtractor):
+class CeskaTelevizeIE(InfoExtractor):
      _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
  
      _TESTS = [
@@ -107,13 +107,7 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
          subtitles = {}
          subs = item.get('subtitles')
          if subs:
-            subtitles['cs'] = subs[0]['url']
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
-
-        subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
+            subtitles = self.extract_subtitles(episode_id, subs)
  
          return {
              'id': episode_id,
@@ -125,11 +119,20 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
              'subtitles': subtitles,
          }
  
+    def _get_subtitles(self, episode_id, subs):
+        original_subtitles = self._download_webpage(
+            subs[0]['url'], episode_id, 'Downloading subtitles')
+        srt_subs = self._fix_subtitles(original_subtitles)
+        return {
+            'cs': [{
+                'ext': 'srt',
+                'data': srt_subs,
+            }]
+        }
+
      @staticmethod
      def _fix_subtitles(subtitles):
          """ Convert millisecond-based subtitles to SRT """
-        if subtitles is None:
-            return subtitles  # subtitles not requested
  
          def _msectotimecode(msec):
              """ Helper utility to convert milliseconds to timecode """
@@ -149,7 +152,4 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
                  else:
                      yield line
  
-        fixed_subtitles = {}
-        for k, v in subtitles.items():
-            fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
-        return fixed_subtitles
+        return "\r\n".join(_fix_subtitle(subtitles))
diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py

new file mode 100644 (file)

index 0000000..b1eeaf1
--- /dev/null
+++ b/youtube_dl/extractor/chirbit.py
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    int_or_none,
+)
+
+
+class ChirbitIE(InfoExtractor):
+    IE_NAME = 'chirbit'
+    _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
+    _TESTS = [{
+        'url': 'http://chirb.it/PrIPv5',
+        'md5': '9847b0dad6ac3e074568bf2cfb197de8',
+        'info_dict': {
+            'id': 'PrIPv5',
+            'ext': 'mp3',
+            'title': 'Фасадстрой',
+            'duration': 52,
+            'view_count': int,
+            'comment_count': int,
+        }
+    }, {
+        'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        audio_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'http://chirb.it/%s' % audio_id, audio_id)
+
+        audio_url = self._search_regex(
+            r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url')
+
+        title = self._search_regex(
+            r'itemprop="name">([^<]+)', webpage, 'title')
+        duration = parse_duration(self._html_search_meta(
+            'duration', webpage, 'duration', fatal=False))
+        view_count = int_or_none(self._search_regex(
+            r'itemprop="playCount"\s*>(\d+)', webpage,
+            'listen count', fatal=False))
+        comment_count = int_or_none(self._search_regex(
+            r'>(\d+) Comments?:', webpage,
+            'comment count', fatal=False))
+
+        return {
+            'id': audio_id,
+            'url': audio_url,
+            'title': title,
+            'duration': duration,
+            'view_count': view_count,
+            'comment_count': comment_count,
+        }
+
+
+class ChirbitProfileIE(InfoExtractor):
+    IE_NAME = 'chirbit:profile'
+    _VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)'
+    _TEST = {
+        'url': 'http://chirbit.com/ScarletBeauty',
+        'info_dict': {
+            'id': 'ScarletBeauty',
+            'title': 'Chirbits by ScarletBeauty',
+        },
+        'playlist_mincount': 3,
+    }
+
+    def _real_extract(self, url):
+        profile_id = self._match_id(url)
+
+        rss = self._download_xml(
+            'http://chirbit.com/rss/%s' % profile_id, profile_id)
+
+        entries = [
+            self.url_result(audio_url.text, 'Chirbit')
+            for audio_url in rss.findall('./channel/item/link')]
+
+        title = rss.find('./channel/title').text
+
+        return self.playlist_result(entries, profile_id, title)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index d3f86cf4a0f82a393dbd7c50ccdc9d747bf08460..87fce9cd89425150baff91577199f706db2a1e81 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -150,8 +150,14 @@ class InfoExtractor(object):
                      If not explicitly set, calculated from timestamp.
      uploader_id:    Nickname or id of the video uploader.
      location:       Physical location where the video was filmed.
-    subtitles:      The subtitle file contents as a dictionary in the format
-                    {language: subtitles}.
+    subtitles:      The available subtitles as a dictionary in the format
+                    {language: subformats}. "subformats" is a list sorted from
+                    lower to higher preference, each element is a dictionary
+                    with the "ext" entry and one of:
+                        * "data": The subtitles file contents
+                        * "url": A url pointing to the subtitles file
+    automatic_captions: Like 'subtitles', used by the YoutubeIE for
+                    automatically generated captions
      duration:       Length of the video in seconds, as an integer.
      view_count:     How many users have watched the video on the platform.
      like_count:     Number of positive ratings of the video
@@ -833,7 +839,7 @@ class InfoExtractor(object):
              'url': m3u8_url,
              'ext': ext,
              'protocol': 'm3u8',
-            'preference': -1,
+            'preference': preference - 1 if preference else -1,
              'resolution': 'multiple',
              'format_note': 'Quality selection URL',
          }]
@@ -1011,6 +1017,24 @@ class InfoExtractor(object):
              any_restricted = any_restricted or is_restricted
          return not any_restricted
  
+    def extract_subtitles(self, *args, **kwargs):
+        if (self._downloader.params.get('writesubtitles', False) or
+                self._downloader.params.get('listsubtitles')):
+            return self._get_subtitles(*args, **kwargs)
+        return {}
+
+    def _get_subtitles(self, *args, **kwargs):
+        raise NotImplementedError("This method must be implemented by subclasses")
+
+    def extract_automatic_captions(self, *args, **kwargs):
+        if (self._downloader.params.get('writeautomaticsub', False) or
+                self._downloader.params.get('listsubtitles')):
+            return self._get_automatic_captions(*args, **kwargs)
+        return {}
+
+    def _get_automatic_captions(self, *args, **kwargs):
+        raise NotImplementedError("This method must be implemented by subclasses")
+
  
  class SearchInfoExtractor(InfoExtractor):
      """
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py

index 1680f532f80167a65c2dbdc3b5bc0bfa83f7fc66..f1da7d09bc934af86f08aa45f8a1a3de32fa4673 100644 (file)
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -9,7 +9,7 @@ import xml.etree.ElementTree
  
  from hashlib import sha1
  from math import pow, sqrt, floor
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
      compat_urllib_request,
@@ -25,10 +25,9 @@ from ..aes import (
      aes_cbc_decrypt,
      inc,
  )
-from .common import InfoExtractor
  
  
-class CrunchyrollIE(SubtitlesInfoExtractor):
+class CrunchyrollIE(InfoExtractor):
      _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
      _TESTS = [{
          'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
@@ -187,6 +186,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
  
          return output
  
+    def _get_subtitles(self, video_id, webpage):
+        subtitles = {}
+        for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
+            sub_page = self._download_webpage(
+                'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
+                video_id, note='Downloading subtitles for ' + sub_name)
+            id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
+            iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
+            data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
+            if not id or not iv or not data:
+                continue
+            id = int(id)
+            iv = base64.b64decode(iv)
+            data = base64.b64decode(data)
+
+            subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
+            lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
+            if not lang_code:
+                continue
+            sub_root = xml.etree.ElementTree.fromstring(subtitle)
+            subtitles[lang_code] = [
+                {
+                    'ext': 'srt',
+                    'data': self._convert_subtitles_to_srt(sub_root),
+                },
+                {
+                    'ext': 'ass',
+                    'data': self._convert_subtitles_to_ass(sub_root),
+                },
+            ]
+        return subtitles
+
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('video_id')
@@ -249,34 +280,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
                  'format_id': video_format,
              })
  
-        subtitles = {}
-        sub_format = self._downloader.params.get('subtitlesformat', 'srt')
-        for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
-            sub_page = self._download_webpage(
-                'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
-                video_id, note='Downloading subtitles for ' + sub_name)
-            id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
-            iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
-            data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
-            if not id or not iv or not data:
-                continue
-            id = int(id)
-            iv = base64.b64decode(iv)
-            data = base64.b64decode(data)
-
-            subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
-            lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
-            if not lang_code:
-                continue
-            sub_root = xml.etree.ElementTree.fromstring(subtitle)
-            if sub_format == 'ass':
-                subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
-            else:
-                subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root)
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
+        subtitles = self.extract_subtitles(video_id, webpage)
  
          return {
              'id': video_id,
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py

index b2dbf4a927262a8e909f5d9963742126d795a0f9..42b20a46ddefc1e4a7e66aacd0d959a1e062618f 100644 (file)
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -6,7 +6,6 @@ import json
  import itertools
  
  from .common import InfoExtractor
-from .subtitles import SubtitlesInfoExtractor
  
  from ..compat import (
      compat_str,
@@ -31,7 +30,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
          return request
  
  
-class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
+class DailymotionIE(DailymotionBaseInfoExtractor):
      """Information Extractor for Dailymotion"""
  
      _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
@@ -143,9 +142,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
  
          # subtitles
          video_subtitles = self.extract_subtitles(video_id, webpage)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, webpage)
-            return
  
          view_count = str_to_int(self._search_regex(
              r'video_views_count[^>]+>\s+([\d\.,]+)',
@@ -169,7 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
              'view_count': view_count,
          }
  
-    def _get_available_subtitles(self, video_id, webpage):
+    def _get_subtitles(self, video_id, webpage):
          try:
              sub_list = self._download_webpage(
                  'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
@@ -179,7 +175,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
              return {}
          info = json.loads(sub_list)
          if (info['total'] > 0):
-            sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
+            sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
              return sub_lang_list
          self._downloader.report_warning('video doesn\'t have subtitles')
          return {}
diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py

index d5df18d7c971c18f01c51128c75fbe4ee09ea070..8257e35a437b075461114fbaf1b4dd2d578f56d8 100644 (file)
--- a/youtube_dl/extractor/drtv.py
+++ b/youtube_dl/extractor/drtv.py
@@ -1,11 +1,10 @@
  from __future__ import unicode_literals
  
-from .subtitles import SubtitlesInfoExtractor
-from .common import ExtractorError
+from .common import InfoExtractor, ExtractorError
  from ..utils import parse_iso8601
  
  
-class DRTVIE(SubtitlesInfoExtractor):
+class DRTVIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
  
      _TEST = {
@@ -76,7 +75,7 @@ class DRTVIE(SubtitlesInfoExtractor):
                      }
                      for subs in subtitles_list:
                          lang = subs['Language']
-                        subtitles[LANGS.get(lang, lang)] = subs['Uri']
+                        subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
  
          if not formats and restricted_to_denmark:
              raise ExtractorError(
@@ -84,10 +83,6 @@ class DRTVIE(SubtitlesInfoExtractor):
  
          self._sort_formats(formats)
  
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
-
          return {
              'id': video_id,
              'title': title,
@@ -96,5 +91,5 @@ class DRTVIE(SubtitlesInfoExtractor):
              'timestamp': timestamp,
              'duration': duration,
              'formats': formats,
-            'subtitles': self.extract_subtitles(video_id, subtitles),
+            'subtitles': subtitles,
          }
diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py

index 4de8d4bc5c9107ddc361a8351ea4a63d3da40783..e006921ec3f8d2a0aff0e6bb0595148469b1c256 100644 (file)
--- a/youtube_dl/extractor/eporner.py
+++ b/youtube_dl/extractor/eporner.py
@@ -35,10 +35,7 @@ class EpornerIE(InfoExtractor):
          title = self._html_search_regex(
              r'<title>(.*?) - EPORNER', webpage, 'title')
  
-        redirect_code = self._html_search_regex(
-            r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,
-            webpage, 'redirect_code')
-        redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
+        redirect_url = 'http://www.eporner.com/config5/%s' % video_id
          player_code = self._download_webpage(
              redirect_url, display_id, note='Downloading player config')
  
@@ -69,5 +66,5 @@ class EpornerIE(InfoExtractor):
              'duration': duration,
              'view_count': view_count,
              'formats': formats,
-            'age_limit': self._rta_search(webpage),
+            'age_limit': 18,
          }
diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py

index 6b693b3b6b6d7452e43c74298393e912aa17bcf1..b45c1dbd07650d9717408591c7b20077bf62475e 100644 (file)
--- a/youtube_dl/extractor/escapist.py
+++ b/youtube_dl/extractor/escapist.py
@@ -31,10 +31,10 @@ class EscapistIE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
  
          uploader_id = self._html_search_regex(
-            r"<h1 class='headline'><a href='/videos/view/(.*?)'",
+            r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
              webpage, 'uploader ID', fatal=False)
          uploader = self._html_search_regex(
-            r"<h1 class='headline'>(.*?)</a>",
+            r"<h1\s+class='headline'>(.*?)</a>",
              webpage, 'uploader', fatal=False)
          description = self._html_search_meta('description', webpage)
  
@@ -42,9 +42,17 @@ class EscapistIE(InfoExtractor):
          title = raw_title.partition(' : ')[2]
  
          config_url = compat_urllib_parse.unquote(self._html_search_regex(
-            r'<param name="flashvars" value="config=([^"&]+)', webpage, 'config URL'))
+            r'''(?x)
+            (?:
+                <param\s+name="flashvars".*?\s+value="config=|
+                flashvars=&quot;config=
+            )
+            (https?://[^"&]+)
+            ''',
+            webpage, 'config URL'))
  
          formats = []
+        ad_formats = []
  
          def _add_format(name, cfgurl, quality):
              config = self._download_json(
@@ -54,14 +62,19 @@ class EscapistIE(InfoExtractor):
                  transform_source=js_to_json)
  
              playlist = config['playlist']
-            video_url = next(
-                p['url'] for p in playlist
-                if p.get('eventCategory') == 'Video')
-            formats.append({
-                'url': video_url,
-                'format_id': name,
-                'quality': quality,
-            })
+            for p in playlist:
+                if p.get('eventCategory') == 'Video':
+                    ar = formats
+                elif p.get('eventCategory') == 'Video Postroll':
+                    ar = ad_formats
+                else:
+                    continue
+
+                ar.append({
+                    'url': p['url'],
+                    'format_id': name,
+                    'quality': quality,
+                })
  
          _add_format('normal', config_url, quality=0)
          hq_url = (config_url +
@@ -70,10 +83,9 @@ class EscapistIE(InfoExtractor):
              _add_format('hq', hq_url, quality=1)
          except ExtractorError:
              pass  # That's fine, we'll just use normal quality
-
          self._sort_formats(formats)
  
-        return {
+        res = {
              'id': video_id,
              'formats': formats,
              'uploader': uploader,
@@ -82,3 +94,19 @@ class EscapistIE(InfoExtractor):
              'thumbnail': self._og_search_thumbnail(webpage),
              'description': description,
          }
+
+        if self._downloader.params.get('include_ads') and ad_formats:
+            self._sort_formats(ad_formats)
+            ad_res = {
+                'id': '%s-ad' % video_id,
+                'title': '%s (Postroll)' % title,
+                'formats': ad_formats,
+            }
+            return {
+                '_type': 'playlist',
+                'entries': [res, ad_res],
+                'title': title,
+                'id': video_id,
+            }
+
+        return res
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py

index 1ad4e77a8a334dc0bfec62a0fb4752676e2e1435..f0e575320015d435889b1bd610b4871dbd84ae21 100644 (file)
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -126,11 +126,17 @@ class FacebookIE(InfoExtractor):
          params_raw = compat_urllib_parse.unquote(data['params'])
          params = json.loads(params_raw)
          video_data = params['video_data'][0]
-        video_url = video_data.get('hd_src')
-        if not video_url:
-            video_url = video_data['sd_src']
-        if not video_url:
-            raise ExtractorError('Cannot find video URL')
+
+        formats = []
+        for quality in ['sd', 'hd']:
+            src = video_data.get('%s_src' % quality)
+            if src is not None:
+                formats.append({
+                    'format_id': quality,
+                    'url': src,
+                })
+        if not formats:
+            raise ExtractorError('Cannot find video formats')
  
          video_title = self._html_search_regex(
              r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
@@ -146,7 +152,7 @@ class FacebookIE(InfoExtractor):
          return {
              'id': video_id,
              'title': video_title,
-            'url': video_url,
+            'formats': formats,
              'duration': int_or_none(video_data.get('video_duration')),
              'thumbnail': video_data.get('thumbnail_src'),
          }
diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py

index 05f58f1afa5c06eefb98f6cc0d5bfc0ce1eb5a79..f7b467b0aff8f46aa028d1898f5909277e973318 100644 (file)
--- a/youtube_dl/extractor/gdcvault.py
+++ b/youtube_dl/extractor/gdcvault.py
@@ -66,10 +66,10 @@ class GDCVaultIE(InfoExtractor):
  
      def _parse_flv(self, xml_description):
          video_formats = []
-        akami_url = xml_description.find('./metadata/akamaiHost').text
+        akamai_url = xml_description.find('./metadata/akamaiHost').text
          slide_video_path = xml_description.find('./metadata/slideVideo').text
          video_formats.append({
-            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % 'fms.digitallyspeaking.com/cfx/st',
+            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
              'play_path': remove_end(slide_video_path, '.flv'),
              'ext': 'flv',
              'format_note': 'slide deck video',
@@ -79,7 +79,7 @@ class GDCVaultIE(InfoExtractor):
          })
          speaker_video_path = xml_description.find('./metadata/speakerVideo').text
          video_formats.append({
-            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % 'fms.digitallyspeaking.com/cfx/st',
+            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
              'play_path': remove_end(speaker_video_path, '.flv'),
              'ext': 'flv',
              'format_note': 'speaker video',
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 875e1bf05ff274a41f46518c48e990954b7e12e5..3aff57e30302d3c33ce5e468f9df642cda0f6ff8 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1208,7 +1208,9 @@ class GenericIE(InfoExtractor):
              return entries[0]
          else:
              for num, e in enumerate(entries, start=1):
-                e['title'] = '%s (%d)' % (e['title'], num)
+                # 'url' results don't have a title
+                if e.get('title') is not None:
+                    e['title'] = '%s (%d)' % (e['title'], num)
              return {
                  '_type': 'playlist',
                  'entries': entries,
diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py

index 2fd3b4699d288809974ead7519e2bc5ddf9bcc68..e8ca49fd1564d8b3c2def7551645cd0179642f97 100644 (file)
--- a/youtube_dl/extractor/laola1tv.py
+++ b/youtube_dl/extractor/laola1tv.py
@@ -1,23 +1,26 @@
+# -*- coding: utf-8 -*-
  from __future__ import unicode_literals
  
  import random
  import re
  
  from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    xpath_text,
+)
  
  
  class Laola1TvIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html'
      _TEST = {
-        'url': 'http://www.laola1.tv/de-de/live/bwf-bitburger-open-grand-prix-gold-court-1/250019.html',
+        'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
          'info_dict': {
-            'id': '250019',
+            'id': '227883',
              'ext': 'mp4',
-            'title': 'Bitburger Open Grand Prix Gold - Court 1',
-            'categories': ['Badminton'],
-            'uploader': 'BWF - Badminton World Federation',
-            'is_live': True,
+            'title': 'Straubing Tigers - Kölner Haie',
+            'categories': ['Eishockey'],
+            'is_live': False,
          },
          'params': {
              'skip_download': True,
@@ -43,15 +46,26 @@ class Laola1TvIE(InfoExtractor):
              r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe)
          flashvars = dict((m[0], m[1]) for m in flashvars_m)
  
+        partner_id = self._search_regex(
+            r'partnerid\s*:\s*"([^"]+)"', iframe, 'partner id')
+
          xml_url = ('http://www.laola1.tv/server/hd_video.php?' +
-                   'play=%s&partner=1&portal=%s&v5ident=&lang=%s' % (
-                       video_id, portal, lang))
+                   'play=%s&partner=%s&portal=%s&v5ident=&lang=%s' % (
+                       video_id, partner_id, portal, lang))
          hd_doc = self._download_xml(xml_url, video_id)
  
-        title = hd_doc.find('.//video/title').text
-        flash_url = hd_doc.find('.//video/url').text
-        categories = hd_doc.find('.//video/meta_sports').text.split(',')
-        uploader = hd_doc.find('.//video/meta_organistation').text
+        title = xpath_text(hd_doc, './/video/title', fatal=True)
+        flash_url = xpath_text(hd_doc, './/video/url', fatal=True)
+        uploader = xpath_text(hd_doc, './/video/meta_organistation')
+
+        is_live = xpath_text(hd_doc, './/video/islive') == 'true'
+        if is_live:
+            raise ExtractorError(
+                'Live streams are not supported by the f4m downloader.')
+
+        categories = xpath_text(hd_doc, './/video/meta_sports')
+        if categories:
+            categories = categories.split(',')
  
          ident = random.randint(10000000, 99999999)
          token_url = '%s&ident=%s&klub=0&unikey=0&timestamp=%s&auth=%s' % (
@@ -60,15 +74,16 @@ class Laola1TvIE(InfoExtractor):
          token_doc = self._download_xml(
              token_url, video_id, note='Downloading token')
          token_attrib = token_doc.find('.//token').attrib
-        if token_attrib.get('auth') == 'blocked':
-            raise ExtractorError('Token error: ' % token_attrib.get('comment'))
+        if token_attrib.get('auth') in ('blocked', 'restricted'):
+            raise ExtractorError(
+                'Token error: %s' % token_attrib.get('comment'), expected=True)
  
          video_url = '%s?hdnea=%s&hdcore=3.2.0' % (
              token_attrib['url'], token_attrib['auth'])
  
          return {
              'id': video_id,
-            'is_live': True,
+            'is_live': is_live,
              'title': title,
              'url': video_url,
              'uploader': uploader,
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py

index 762cefa34ec35aa172102a5bbe9f78c129bdef92..109055e720a6908b748c08ddd70fd69026f192ab 100644 (file)
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
  import re
  import json
  
-from .subtitles import SubtitlesInfoExtractor
  from .common import InfoExtractor
  from ..compat import (
      compat_str,
@@ -16,7 +15,7 @@ from ..utils import (
  )
  
  
-class LyndaIE(SubtitlesInfoExtractor):
+class LyndaIE(InfoExtractor):
      IE_NAME = 'lynda'
      IE_DESC = 'lynda.com videos'
      _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
@@ -88,11 +87,7 @@ class LyndaIE(SubtitlesInfoExtractor):
          self._check_formats(formats, video_id)
          self._sort_formats(formats)
  
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, page)
-            return
-
-        subtitles = self._fix_subtitles(self.extract_subtitles(video_id, page))
+        subtitles = self.extract_subtitles(video_id, page)
  
          return {
              'id': video_id,
@@ -144,38 +139,31 @@ class LyndaIE(SubtitlesInfoExtractor):
          if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
              raise ExtractorError('Unable to log in')
  
-    def _fix_subtitles(self, subtitles):
-        if subtitles is None:
-            return subtitles  # subtitles not requested
-
-        fixed_subtitles = {}
-        for k, v in subtitles.items():
-            subs = json.loads(v)
-            if len(subs) == 0:
+    def _fix_subtitles(self, subs):
+        srt = ''
+        for pos in range(0, len(subs) - 1):
+            seq_current = subs[pos]
+            m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
+            if m_current is None:
                  continue
-            srt = ''
-            for pos in range(0, len(subs) - 1):
-                seq_current = subs[pos]
-                m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
-                if m_current is None:
-                    continue
-                seq_next = subs[pos + 1]
-                m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
-                if m_next is None:
-                    continue
-                appear_time = m_current.group('timecode')
-                disappear_time = m_next.group('timecode')
-                text = seq_current['Caption']
-                srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
-            if srt:
-                fixed_subtitles[k] = srt
-        return fixed_subtitles
-
-    def _get_available_subtitles(self, video_id, webpage):
+            seq_next = subs[pos + 1]
+            m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
+            if m_next is None:
+                continue
+            appear_time = m_current.group('timecode')
+            disappear_time = m_next.group('timecode')
+            text = seq_current['Caption']
+            srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
+        if srt:
+            return srt
+
+    def _get_subtitles(self, video_id, webpage):
          url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
-        sub = self._download_webpage(url, None, False)
-        sub_json = json.loads(sub)
-        return {'en': url} if len(sub_json) > 0 else {}
+        subs = self._download_json(url, None, False)
+        if subs:
+            return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
+        else:
+            return {}
  
  
  class LyndaCourseIE(InfoExtractor):
diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py

index 3c61a850f296c32861cdfd35095746c2cf1ef4ad..d7ab6a9aef23235d099175c7aff76ddd0ac0f84d 100644 (file)
--- a/youtube_dl/extractor/mit.py
+++ b/youtube_dl/extractor/mit.py
@@ -5,9 +5,6 @@ import json
  
  from .common import InfoExtractor
  from .youtube import YoutubeIE
-from ..compat import (
-    compat_urlparse,
-)
  from ..utils import (
      clean_html,
      ExtractorError,
@@ -108,7 +105,6 @@ class OCWMITIE(InfoExtractor):
                  'upload_date': '20121109',
                  'uploader_id': 'MIT',
                  'uploader': 'MIT OpenCourseWare',
-                # 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
              }
          },
          {
@@ -121,7 +117,6 @@ class OCWMITIE(InfoExtractor):
                  'uploader_id': 'MIT',
                  'uploader': 'MIT OpenCourseWare',
                  'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
-                # 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
              }
          }
      ]
@@ -140,7 +135,6 @@ class OCWMITIE(InfoExtractor):
              metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
              metadata = re.split(r', ?', metadata)
              yt = metadata[1]
-            subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7])
          else:
              # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
              embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
@@ -148,7 +142,6 @@ class OCWMITIE(InfoExtractor):
                  metadata = re.sub(r'[\'"]', '', embed_media.group(1))
                  metadata = re.split(r', ?', metadata)
                  yt = metadata[1]
-                subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5])
              else:
                  raise ExtractorError('Unable to find embedded YouTube video.')
          video_id = YoutubeIE.extract_id(yt)
@@ -159,7 +152,5 @@ class OCWMITIE(InfoExtractor):
              'title': title,
              'description': description,
              'url': yt,
-            'url_transparent'
-            'subtitles': subs,
              'ie_key': 'Youtube',
          }
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py

index 2567583235617e52b6420419863dbc8d319c8201..d8897eb90d526b7b7d2e5a5ace5bec84ebb40031 100644 (file)
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -18,7 +18,7 @@ class MiTeleIE(InfoExtractor):
      IE_NAME = 'mitele.es'
      _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
          'md5': '6a75fe9d0d3275bead0cb683c616fddb',
          'info_dict': {
@@ -29,7 +29,7 @@ class MiTeleIE(InfoExtractor):
              'display_id': 'programa-144',
              'duration': 2913,
          },
-    }
+    }]
  
      def _real_extract(self, url):
          episode = self._match_id(url)
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py

index bc7f49ebbac86cda7aa1bb711076b783e24bfea8..c11de1cb61b28d03ab2430ff1db3a82d317dc718 100644 (file)
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -2,7 +2,7 @@ from __future__ import unicode_literals
  
  import re
  
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
      compat_urllib_request,
@@ -23,7 +23,7 @@ def _media_xml_tag(tag):
      return '{http://search.yahoo.com/mrss/}%s' % tag
  
  
-class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
+class MTVServicesInfoExtractor(InfoExtractor):
      _MOBILE_TEMPLATE = None
  
      @staticmethod
@@ -95,25 +95,15 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
  
      def _extract_subtitles(self, mdoc, mtvn_id):
          subtitles = {}
-        FORMATS = {
-            'scc': 'cea-608',
-            'eia-608': 'cea-608',
-            'xml': 'ttml',
-        }
-        subtitles_format = FORMATS.get(
-            self._downloader.params.get('subtitlesformat'), 'ttml')
          for transcript in mdoc.findall('.//transcript'):
              if transcript.get('kind') != 'captions':
                  continue
              lang = transcript.get('srclang')
-            for typographic in transcript.findall('./typographic'):
-                captions_format = typographic.get('format')
-                if captions_format == subtitles_format:
-                    subtitles[lang] = compat_str(typographic.get('src'))
-                    break
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(mtvn_id, subtitles)
-        return self.extract_subtitles(mtvn_id, subtitles)
+            subtitles[lang] = [{
+                'url': compat_str(typographic.get('src')),
+                'ext': typographic.get('format')
+            } for typographic in transcript.findall('./typographic')]
+        return subtitles
  
      def _get_video_info(self, itemdoc):
          uri = itemdoc.find('guid').text
@@ -196,8 +186,6 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
                  webpage, 'mgid')
  
          videos_info = self._get_videos_info(mgid)
-        if self._downloader.params.get('listsubtitles', False):
-            return
          return videos_info
  
  
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py

index c075618e84cb8181e27c2a9dc3cc033a16d5dea4..9c01eb0af8067948878581a0a30d9be326f990e9 100644 (file)
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -1,6 +1,5 @@
  from __future__ import unicode_literals
  
-from .subtitles import SubtitlesInfoExtractor
  from .common import InfoExtractor
  from ..utils import (
      fix_xml_ampersands,
@@ -12,7 +11,7 @@ from ..utils import (
  )
  
  
-class NPOBaseIE(SubtitlesInfoExtractor):
+class NPOBaseIE(InfoExtractor):
      def _get_token(self, video_id):
          token_page = self._download_webpage(
              'http://ida.omroep.nl/npoplayer/i.js',
@@ -164,13 +163,10 @@ class NPOIE(NPOBaseIE):
  
          subtitles = {}
          if metadata.get('tt888') == 'ja':
-            subtitles['nl'] = 'http://e.omroep.nl/tt888/%s' % video_id
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
-
-        subtitles = self.extract_subtitles(video_id, subtitles)
+            subtitles['nl'] = [{
+                'ext': 'vtt',
+                'url': 'http://e.omroep.nl/tt888/%s' % video_id,
+            }]
  
          return {
              'id': video_id,
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py

index f6de260222c678e2233b668d4b557e22e51d224c..1e4cfa2e7c8c5e3ae05c7d5fbc11242a334a5322 100644 (file)
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -4,13 +4,13 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
+from ..compat import compat_str
  from ..utils import (
      ExtractorError,
      float_or_none,
      parse_duration,
      unified_strdate,
  )
-from .subtitles import SubtitlesInfoExtractor
  
  
  class NRKIE(InfoExtractor):
@@ -73,7 +73,7 @@ class NRKIE(InfoExtractor):
          }
  
  
-class NRKTVIE(SubtitlesInfoExtractor):
+class NRKTVIE(InfoExtractor):
      _VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
  
      _TESTS = [
@@ -156,10 +156,12 @@ class NRKTVIE(SubtitlesInfoExtractor):
          if self._downloader.params.get('verbose', False):
              self.to_screen('[debug] %s' % txt)
  
-    def _extract_captions(self, subtitlesurl, video_id, baseurl):
+    def _get_subtitles(self, subtitlesurl, video_id, baseurl):
          url = "%s%s" % (baseurl, subtitlesurl)
          self._debug_print('%s: Subtitle url: %s' % (video_id, url))
-        captions = self._download_xml(url, video_id, 'Downloading subtitles')
+        captions = self._download_xml(
+            url, video_id, 'Downloading subtitles',
+            transform_source=lambda s: s.replace(r'<br />', '\r\n'))
          lang = captions.get('lang', 'no')
          ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
          srt = ''
@@ -168,9 +170,11 @@ class NRKTVIE(SubtitlesInfoExtractor):
              duration = parse_duration(p.get('dur'))
              starttime = self._seconds2str(begin)
              endtime = self._seconds2str(begin + duration)
-            text = '\n'.join(p.itertext())
-            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
-        return {lang: srt}
+            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
+        return {lang: [
+            {'ext': 'ttml', 'url': url},
+            {'ext': 'srt', 'data': srt},
+        ]}
  
      def _extract_f4m(self, manifest_url, video_id):
          return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
@@ -243,10 +247,7 @@ class NRKTVIE(SubtitlesInfoExtractor):
              webpage, 'subtitle URL', default=None)
          subtitles = None
          if subtitles_url:
-            subtitles = self._extract_captions(subtitles_url, video_id, baseurl)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
+            subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl)
  
          return {
              'id': video_id,
diff --git a/youtube_dl/extractor/r7.py b/youtube_dl/extractor/r7.py

new file mode 100644 (file)

index 0000000..976c8fe
--- /dev/null
+++ b/youtube_dl/extractor/r7.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    js_to_json,
+    unescapeHTML,
+    int_or_none,
+)
+
+
+class R7IE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://
+                        (?:
+                            (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/|
+                            noticias\.r7\.com(?:/[^/]+)+/[^/]+-|
+                            player\.r7\.com/video/i/
+                        )
+                        (?P<id>[\da-f]{24})
+                        '''
+    _TESTS = [{
+        'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html',
+        'md5': '403c4e393617e8e8ddc748978ee8efde',
+        'info_dict': {
+            'id': '54e7050b0cf2ff57e0279389',
+            'ext': 'mp4',
+            'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 98,
+            'like_count': int,
+            'view_count': int,
+        },
+    }, {
+        'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/',
+        'only_matching': True,
+    }, {
+        'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'http://player.r7.com/video/i/%s' % video_id, video_id)
+
+        item = self._parse_json(js_to_json(self._search_regex(
+            r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id)
+
+        title = unescapeHTML(item['title'])
+        thumbnail = item.get('init', {}).get('thumbUri')
+        duration = None
+
+        statistics = item.get('statistics', {})
+        like_count = int_or_none(statistics.get('likes'))
+        view_count = int_or_none(statistics.get('views'))
+
+        formats = []
+        for format_key, format_dict in item['playlist'][0].items():
+            src = format_dict.get('src')
+            if not src:
+                continue
+            format_id = format_dict.get('format') or format_key
+            if duration is None:
+                duration = format_dict.get('duration')
+            if '.f4m' in src:
+                formats.extend(self._extract_f4m_formats(src, video_id, preference=-1))
+            elif src.endswith('.m3u8'):
+                formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2))
+            else:
+                formats.append({
+                    'url': src,
+                    'format_id': format_id,
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'like_count': like_count,
+            'view_count': view_count,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py

index aa26b7e0bb0f4f0a489ad4cfdef330c704747680..144e3398259179e396206d0ea059e334953dcfd7 100644 (file)
--- a/youtube_dl/extractor/rai.py
+++ b/youtube_dl/extractor/rai.py
@@ -2,7 +2,7 @@ from __future__ import unicode_literals
  
  import re
  
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
  )
@@ -12,7 +12,7 @@ from ..utils import (
  )
  
  
-class RaiIE(SubtitlesInfoExtractor):
+class RaiIE(InfoExtractor):
      _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
      _TESTS = [
          {
@@ -89,15 +89,7 @@ class RaiIE(SubtitlesInfoExtractor):
                  'ext': 'mp4',
              })
  
-        if self._downloader.params.get('listsubtitles', False):
-            page = self._download_webpage(url, video_id)
-            self._list_available_subtitles(video_id, page)
-            return
-
-        subtitles = {}
-        if self._have_to_download_any_subtitles:
-            page = self._download_webpage(url, video_id)
-            subtitles = self.extract_subtitles(video_id, page)
+        subtitles = self.extract_subtitles(video_id, url)
  
          return {
              'id': video_id,
@@ -111,7 +103,8 @@ class RaiIE(SubtitlesInfoExtractor):
              'subtitles': subtitles,
          }
  
-    def _get_available_subtitles(self, video_id, webpage):
+    def _get_subtitles(self, video_id, url):
+        webpage = self._download_webpage(url, video_id)
          subtitles = {}
          m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
          if m:
@@ -120,5 +113,8 @@ class RaiIE(SubtitlesInfoExtractor):
              SRT_EXT = '.srt'
              if captions.endswith(STL_EXT):
                  captions = captions[:-len(STL_EXT)] + SRT_EXT
-            subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions)
+            subtitles['it'] = [{
+                'ext': 'srt',
+                'url': 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions),
+            }]
          return subtitles
diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py

index fd93cc66f5e1d377341cc016e67825b0f7f2782d..785a8045e09d65f31405fbd15106dbeb684afcc9 100644 (file)
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@@ -146,7 +146,7 @@ class RTLnowIE(InfoExtractor):
                  mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text)
                  if mobj:
                      fmt = {
-                        'url': 'rtmpe://fmspay-fra2.rtl.de/' + mobj.group('hoster'),
+                        'url': 'rtmpe://fms.rtl.de/' + mobj.group('hoster'),
                          'play_path': 'mp4:' + mobj.group('play_path'),
                          'page_url': url,
                          'player_url': video_page_url + 'includes/vodplayer.swf',
diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py

index e60f85b5b4842d90b49aeec9aa87da8def92d4f9..c0fd23ff108f8b523a0e71f458147287954f429c 100644 (file)
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -102,14 +102,27 @@ class RTVEALaCartaIE(InfoExtractor):
              video_url = compat_urlparse.urljoin(
                  'http://mvod1.akcdn.rtve.es/', video_path)
  
+        subtitles = None
+        if info.get('sbtFile') is not None:
+            subtitles = self.extract_subtitles(video_id, info['sbtFile'])
+
          return {
              'id': video_id,
              'title': info['title'],
              'url': video_url,
              'thumbnail': info.get('image'),
              'page_url': url,
+            'subtitles': subtitles,
          }
  
+    def _get_subtitles(self, video_id, sub_file):
+        subs = self._download_json(
+            sub_file + '.json', video_id,
+            'Downloading subtitles info')['page']['items']
+        return dict(
+            (s['lang'], [{'ext': 'vtt', 'url': s['src']}])
+            for s in subs)
+
  
  class RTVELiveIE(InfoExtractor):
      IE_NAME = 'rtve.es:live'
diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py

index a4f8ce6c3c8cce1854e5695783908a7804af0cac..3a4ddf57ea369a0b250a4d786738e0ea4db9e1dd 100644 (file)
--- a/youtube_dl/extractor/soundgasm.py
+++ b/youtube_dl/extractor/soundgasm.py
@@ -7,6 +7,7 @@ from .common import InfoExtractor
  
  
  class SoundgasmIE(InfoExtractor):
+    IE_NAME = 'soundgasm'
      _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
      _TEST = {
          'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
@@ -38,3 +39,26 @@ class SoundgasmIE(InfoExtractor):
              'title': audio_title,
              'description': description
          }
+
+
+class SoundgasmProfileIE(InfoExtractor):
+    IE_NAME = 'soundgasm:profile'
+    _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$'
+    _TEST = {
+        'url': 'http://soundgasm.net/u/ytdl',
+        'info_dict': {
+            'id': 'ytdl',
+        },
+        'playlist_count': 1,
+    }
+
+    def _real_extract(self, url):
+        profile_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, profile_id)
+
+        entries = [
+            self.url_result(audio_url, 'Soundgasm')
+            for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)]
+
+        return self.playlist_result(entries, profile_id)
diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py

deleted file mode 100644 (file)

index 59a5126..0000000
--- a/youtube_dl/extractor/subtitles.py
+++ /dev/null
@@ -1,99 +0,0 @@
-from __future__ import unicode_literals
-from .common import InfoExtractor
-
-from ..compat import compat_str
-from ..utils import (
-    ExtractorError,
-)
-
-
-class SubtitlesInfoExtractor(InfoExtractor):
-    @property
-    def _have_to_download_any_subtitles(self):
-        return any([self._downloader.params.get('writesubtitles', False),
-                    self._downloader.params.get('writeautomaticsub')])
-
-    def _list_available_subtitles(self, video_id, webpage):
-        """ outputs the available subtitles for the video """
-        sub_lang_list = self._get_available_subtitles(video_id, webpage)
-        auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
-        sub_lang = ",".join(list(sub_lang_list.keys()))
-        self.to_screen('%s: Available subtitles for video: %s' %
-                       (video_id, sub_lang))
-        auto_lang = ",".join(auto_captions_list.keys())
-        self.to_screen('%s: Available automatic captions for video: %s' %
-                       (video_id, auto_lang))
-
-    def extract_subtitles(self, video_id, webpage):
-        """
-        returns {sub_lang: sub} ,{} if subtitles not found or None if the
-        subtitles aren't requested.
-        """
-        if not self._have_to_download_any_subtitles:
-            return None
-        available_subs_list = {}
-        if self._downloader.params.get('writeautomaticsub', False):
-            available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
-        if self._downloader.params.get('writesubtitles', False):
-            available_subs_list.update(self._get_available_subtitles(video_id, webpage))
-
-        if not available_subs_list:  # error, it didn't get the available subtitles
-            return {}
-        if self._downloader.params.get('allsubtitles', False):
-            sub_lang_list = available_subs_list
-        else:
-            if self._downloader.params.get('subtitleslangs', False):
-                requested_langs = self._downloader.params.get('subtitleslangs')
-            elif 'en' in available_subs_list:
-                requested_langs = ['en']
-            else:
-                requested_langs = [list(available_subs_list.keys())[0]]
-
-            sub_lang_list = {}
-            for sub_lang in requested_langs:
-                if sub_lang not in available_subs_list:
-                    self._downloader.report_warning('no closed captions found in the specified language "%s"' % sub_lang)
-                    continue
-                sub_lang_list[sub_lang] = available_subs_list[sub_lang]
-
-        subtitles = {}
-        for sub_lang, url in sub_lang_list.items():
-            subtitle = self._request_subtitle_url(sub_lang, url)
-            if subtitle:
-                subtitles[sub_lang] = subtitle
-        return subtitles
-
-    def _download_subtitle_url(self, sub_lang, url):
-        return self._download_webpage(url, None, note=False)
-
-    def _request_subtitle_url(self, sub_lang, url):
-        """ makes the http request for the subtitle """
-        try:
-            sub = self._download_subtitle_url(sub_lang, url)
-        except ExtractorError as err:
-            self._downloader.report_warning('unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
-            return
-        if not sub:
-            self._downloader.report_warning('Did not fetch video subtitles')
-            return
-        return sub
-
-    def _get_available_subtitles(self, video_id, webpage):
-        """
-        returns {sub_lang: url} or {} if not available
-        Must be redefined by the subclasses
-        """
-
-        # By default, allow implementations to simply pass in the result
-        assert isinstance(webpage, dict), \
-            '_get_available_subtitles not implemented'
-        return webpage
-
-    def _get_available_automatic_caption(self, video_id, webpage):
-        """
-        returns {sub_lang: url} or {} if not available
-        Must be redefined by the subclasses that support automatic captions,
-        otherwise it will return {}
-        """
-        self._downloader.report_warning('Automatic Captions not supported by this server')
-        return {}
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py

index 59678399d5c5ba17894e557830e4c1d3e3236fab..4cec06f8bd6e2a18ac3062e916225746f5153c93 100644 (file)
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -3,14 +3,14 @@ from __future__ import unicode_literals
  import json
  import re
  
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  
  from ..compat import (
      compat_str,
  )
  
  
-class TEDIE(SubtitlesInfoExtractor):
+class TEDIE(InfoExtractor):
      _VALID_URL = r'''(?x)
          (?P<proto>https?://)
          (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
@@ -184,11 +184,6 @@ class TEDIE(SubtitlesInfoExtractor):
          self._sort_formats(formats)
  
          video_id = compat_str(talk_info['id'])
-        # subtitles
-        video_subtitles = self.extract_subtitles(video_id, talk_info)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, talk_info)
-            return
  
          thumbnail = talk_info['thumb']
          if not thumbnail.startswith('http'):
@@ -199,21 +194,25 @@ class TEDIE(SubtitlesInfoExtractor):
              'uploader': talk_info['speaker'],
              'thumbnail': thumbnail,
              'description': self._og_search_description(webpage),
-            'subtitles': video_subtitles,
+            'subtitles': self._get_subtitles(video_id, talk_info),
              'formats': formats,
              'duration': talk_info.get('duration'),
          }
  
-    def _get_available_subtitles(self, video_id, talk_info):
+    def _get_subtitles(self, video_id, talk_info):
          languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
          if languages:
              sub_lang_list = {}
              for l in languages:
-                url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
-                sub_lang_list[l] = url
+                sub_lang_list[l] = [
+                    {
+                        'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext),
+                        'ext': ext,
+                    }
+                    for ext in ['ted', 'srt']
+                ]
              return sub_lang_list
          else:
-            self._downloader.report_warning('video doesn\'t have subtitles')
              return {}
  
      def _watch_info(self, url, name):
diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py

index be3f72df7c11043346b015528ae905913a3d05df..251a686804b6f26915c3fa25d9f6b2cc1f98ed4b 100644 (file)
--- a/youtube_dl/extractor/telecinco.py
+++ b/youtube_dl/extractor/telecinco.py
@@ -6,9 +6,9 @@ from .mitele import MiTeleIE
  
  class TelecincoIE(MiTeleIE):
      IE_NAME = 'telecinco.es'
-    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<id>.*?)\.html'
+    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/(?:[^/]+/)?(?P<id>.*?)\.html'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
          'info_dict': {
              'id': 'MDSVID20141015_0058',
@@ -16,4 +16,7 @@ class TelecincoIE(MiTeleIE):
              'title': 'Con Martín Berasategui, hacer un bacalao al ...',
              'duration': 662,
          },
-    }
+    }, {
+        'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
+        'only_matching': True,
+    }]
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py

index f7b34bd265daf8204d5c6e00de872e6607d778a7..feac666f78baff49f4fb312a147acad67d320bc2 100644 (file)
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -8,7 +8,7 @@ import binascii
  import hashlib
  
  
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  from ..compat import (
      compat_str,
  )
@@ -22,7 +22,7 @@ from ..utils import (
  _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
  
  
-class ThePlatformIE(SubtitlesInfoExtractor):
+class ThePlatformIE(InfoExtractor):
      _VALID_URL = r'''(?x)
          (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
             (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
@@ -106,15 +106,11 @@ class ThePlatformIE(SubtitlesInfoExtractor):
          captions = info.get('captions')
          if isinstance(captions, list):
              for caption in captions:
-                lang, src = caption.get('lang'), caption.get('src')
-                if lang and src:
-                    subtitles[lang] = src
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
-
-        subtitles = self.extract_subtitles(video_id, subtitles)
+                lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
+                subtitles[lang] = [{
+                    'ext': 'srt' if mime == 'text/srt' else 'ttml',
+                    'url': src,
+                }]
  
          head = meta.find(_x('smil:head'))
          body = meta.find(_x('smil:body'))
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py

index 944901e1482a666ae90cc5e1c0f86e325ec2aecc..6816dacb665e2253a132cfe678999a1129860a0b 100644 (file)
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -2,16 +2,17 @@ from __future__ import unicode_literals
  
  import re
  
+from ..compat import compat_urlparse
  from ..utils import (
      ExtractorError,
      unescapeHTML,
      unified_strdate,
      US_RATINGS,
  )
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  
  
-class VikiIE(SubtitlesInfoExtractor):
+class VikiIE(InfoExtractor):
      IE_NAME = 'viki'
  
      _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
@@ -69,9 +70,6 @@ class VikiIE(SubtitlesInfoExtractor):
  
          # subtitles
          video_subtitles = self.extract_subtitles(video_id, info_webpage)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, info_webpage)
-            return
  
          return {
              'id': video_id,
@@ -85,12 +83,15 @@ class VikiIE(SubtitlesInfoExtractor):
              'upload_date': upload_date,
          }
  
-    def _get_available_subtitles(self, video_id, info_webpage):
+    def _get_subtitles(self, video_id, info_webpage):
          res = {}
-        for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
+        for sturl_html in re.findall(r'<track src="([^"]+)"', info_webpage):
              sturl = unescapeHTML(sturl_html)
              m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
              if not m:
                  continue
-            res[m.group('lang')] = sturl
+            res[m.group('lang')] = [{
+                'url': compat_urlparse.urljoin('http://www.viki.com', sturl),
+                'ext': 'vtt',
+            }]
          return res
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py

index 4cd2f73d9962529db31e259011db9ccbfe053ac4..8f540f5780570d06fa10e695555026c537b7c0f0 100644 (file)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -7,7 +7,6 @@ import itertools
  import hashlib
  
  from .common import InfoExtractor
-from .subtitles import SubtitlesInfoExtractor
  from ..compat import (
      compat_HTTPError,
      compat_urllib_parse,
@@ -53,7 +52,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
          self._download_webpage(login_request, None, False, 'Wrong login info')
  
  
-class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
+class VimeoIE(VimeoBaseInfoExtractor):
      """Information extractor for vimeo.com."""
  
      # _VALID_URL matches Vimeo URLs
@@ -378,12 +377,10 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
          text_tracks = config['request'].get('text_tracks')
          if text_tracks:
              for tt in text_tracks:
-                subtitles[tt['lang']] = 'http://vimeo.com' + tt['url']
-
-        video_subtitles = self.extract_subtitles(video_id, subtitles)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
+                subtitles[tt['lang']] = [{
+                    'ext': 'vtt',
+                    'url': 'http://vimeo.com' + tt['url'],
+                }]
  
          return {
              'id': video_id,
@@ -399,7 +396,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
              'view_count': view_count,
              'like_count': like_count,
              'comment_count': comment_count,
-            'subtitles': video_subtitles,
+            'subtitles': subtitles,
          }
  
  
diff --git a/youtube_dl/extractor/walla.py b/youtube_dl/extractor/walla.py

index 672bda7a7aea761b28f2d2d48be2fe2a9dbd054e..24efbd6e6341ba5aa73e5df11cb9af36f941da43 100644 (file)
--- a/youtube_dl/extractor/walla.py
+++ b/youtube_dl/extractor/walla.py
@@ -3,14 +3,14 @@ from __future__ import unicode_literals
  
  import re
  
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  from ..utils import (
      xpath_text,
      int_or_none,
  )
  
  
-class WallaIE(SubtitlesInfoExtractor):
+class WallaIE(InfoExtractor):
      _VALID_URL = r'http://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
      _TEST = {
          'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
@@ -52,13 +52,10 @@ class WallaIE(SubtitlesInfoExtractor):
          subtitles = {}
          for subtitle in item.findall('./subtitles/subtitle'):
              lang = xpath_text(subtitle, './title')
-            subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = xpath_text(subtitle, './src')
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
-
-        subtitles = self.extract_subtitles(video_id, subtitles)
+            subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
+                'ext': 'srt',
+                'url': xpath_text(subtitle, './src'),
+            }]
  
          formats = []
          for quality in item.findall('./qualities/quality'):
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 3d3d43491c293c79334ec2c1a8a42fec93063796..3690f8021267b30171be4f2e7a019133aaeaaca9 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -11,7 +11,6 @@ import time
  import traceback
  
  from .common import InfoExtractor, SearchInfoExtractor
-from .subtitles import SubtitlesInfoExtractor
  from ..jsinterp import JSInterpreter
  from ..swfinterp import SWFInterpreter
  from ..compat import (
@@ -185,7 +184,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
              return
  
  
-class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
+class YoutubeIE(YoutubeBaseInfoExtractor):
      IE_DESC = 'YouTube.com'
      _VALID_URL = r"""(?x)^
                       (
@@ -648,7 +647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              raise ExtractorError(
                  'Signature extraction failed: ' + tb, cause=e)
  
-    def _get_available_subtitles(self, video_id, webpage):
+    def _get_subtitles(self, video_id, webpage):
          try:
              subs_doc = self._download_xml(
                  'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
@@ -662,23 +661,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              lang = track.attrib['lang_code']
              if lang in sub_lang_list:
                  continue
-            params = compat_urllib_parse.urlencode({
-                'lang': lang,
-                'v': video_id,
-                'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
-                'name': track.attrib['name'].encode('utf-8'),
-            })
-            url = 'https://www.youtube.com/api/timedtext?' + params
-            sub_lang_list[lang] = url
+            sub_formats = []
+            for ext in ['sbv', 'vtt', 'srt']:
+                params = compat_urllib_parse.urlencode({
+                    'lang': lang,
+                    'v': video_id,
+                    'fmt': ext,
+                    'name': track.attrib['name'].encode('utf-8'),
+                })
+                sub_formats.append({
+                    'url': 'https://www.youtube.com/api/timedtext?' + params,
+                    'ext': ext,
+                })
+            sub_lang_list[lang] = sub_formats
          if not sub_lang_list:
              self._downloader.report_warning('video doesn\'t have subtitles')
              return {}
          return sub_lang_list
  
-    def _get_available_automatic_caption(self, video_id, webpage):
+    def _get_automatic_captions(self, video_id, webpage):
          """We need the webpage for getting the captions url, pass it as an
             argument to speed up the process."""
-        sub_format = self._downloader.params.get('subtitlesformat', 'srt')
          self.to_screen('%s: Looking for automatic captions' % video_id)
          mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
          err_msg = 'Couldn\'t find automatic captions for %s' % video_id
@@ -708,14 +711,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              sub_lang_list = {}
              for lang_node in caption_list.findall('target'):
                  sub_lang = lang_node.attrib['lang_code']
-                params = compat_urllib_parse.urlencode({
-                    'lang': original_lang,
-                    'tlang': sub_lang,
-                    'fmt': sub_format,
-                    'ts': timestamp,
-                    'kind': caption_kind,
-                })
-                sub_lang_list[sub_lang] = caption_url + '&' + params
+                sub_formats = []
+                for ext in ['sbv', 'vtt', 'srt']:
+                    params = compat_urllib_parse.urlencode({
+                        'lang': original_lang,
+                        'tlang': sub_lang,
+                        'fmt': ext,
+                        'ts': timestamp,
+                        'kind': caption_kind,
+                    })
+                    sub_formats.append({
+                        'url': caption_url + '&' + params,
+                        'ext': ext,
+                    })
+                sub_lang_list[sub_lang] = sub_formats
              return sub_lang_list
          # An extractor error can be raise by the download process if there are
          # no automatic captions but there are subtitles
@@ -970,10 +979,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
  
          # subtitles
          video_subtitles = self.extract_subtitles(video_id, video_webpage)
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, video_webpage)
-            return
+        automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
  
          if 'length_seconds' not in video_info:
              self._downloader.report_warning('unable to extract video duration')
@@ -1122,6 +1128,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              'description': video_description,
              'categories': video_categories,
              'subtitles': video_subtitles,
+            'automatic_captions': automatic_captions,
              'duration': video_duration,
              'age_limit': 18 if age_gate else 0,
              'annotations': video_annotations,
@@ -1146,13 +1153,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                          |  p/
                          )
                          (
-                            (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
+                            (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
                              # Top tracks, they can also include dots
                              |(?:MC)[\w\.]*
                          )
                          .*
                       |
-                        ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
+                        ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
                       )"""
      _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
      _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
@@ -1237,7 +1244,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
              for vid_id in ids]
  
      def _extract_mix(self, playlist_id):
-        # The mixes are generated from a a single video
+        # The mixes are generated from a single video
          # the id of the playlist is just 'RD' + video_id
          url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
          webpage = self._download_webpage(
@@ -1273,7 +1280,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
              else:
                  self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
  
-        if playlist_id.startswith('RD'):
+        if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
              # Mixes require a custom extraction process
              return self._extract_mix(playlist_id)
  
diff --git a/youtube_dl/options.py b/youtube_dl/options.py

index 5f678f76b9561bf374693d29aa6e4c92406e08a2..886ce96132d4ec6b67b3bf3114425463f6687597 100644 (file)
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -272,6 +272,10 @@ def parseOpts(overrideArguments=None):
          '--no-playlist',
          action='store_true', dest='noplaylist', default=False,
          help='If the URL refers to a video and a playlist, download only the video.')
+    selection.add_option(
+        '--yes-playlist',
+        action='store_false', dest='noplaylist', default=False,
+        help='If the URL refers to a video and a playlist, download the playlist.')
      selection.add_option(
          '--age-limit',
          metavar='YEARS', dest='age_limit', default=None, type=int,
@@ -387,8 +391,8 @@ def parseOpts(overrideArguments=None):
          help='lists all available subtitles for the video')
      subtitles.add_option(
          '--sub-format',
-        action='store', dest='subtitlesformat', metavar='FORMAT', default='srt',
-        help='subtitle format (default=srt) ([sbv/vtt] youtube only)')
+        action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
+        help='subtitle format, accepts formats preference, for example: "ass/srt/best"')
      subtitles.add_option(
          '--sub-lang', '--sub-langs', '--srt-lang',
          action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py

index 3f2e6cf1d38af3c6896cce5c92560f0a0a53d554..398fe050ede3d7da8678fd1453bc1ae475419362 100644 (file)
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -496,10 +496,6 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
          'zu': 'zul',
      }
  
-    def __init__(self, downloader=None, subtitlesformat='srt'):
-        super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
-        self._subformat = subtitlesformat
-
      @classmethod
      def _conver_lang_code(cls, code):
          """Convert language code from ISO 639-1 to ISO 639-2/T"""
@@ -509,13 +505,14 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
          if information['ext'] != 'mp4':
              self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files')
              return True, information
-        if not information.get('subtitles'):
+        subtitles = information.get('requested_subtitles')
+        if not subtitles:
              self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
              return True, information
  
-        sub_langs = [key for key in information['subtitles']]
+        sub_langs = list(subtitles.keys())
          filename = information['filepath']
-        input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
+        input_files = [filename] + [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()]
  
          opts = [
              '-map', '0',
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 475fad3c903f9a2923def9f186c746d067807a68..506c896de377da7115e3d7139742ee9eab581d56 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -54,7 +54,7 @@ from .compat import (
  compiled_regex_type = type(re.compile(''))
  
  std_headers = {
-    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
      'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
      'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
      'Accept-Encoding': 'gzip, deflate',
@@ -304,6 +304,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
          # Common case of "Foreign band name - English song title"
          if restricted and result.startswith('-_'):
              result = result[2:]
+        if result.startswith('-'):
+            result = '_' + result[len('-'):]
          if not result:
              result = '_'
      return result
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 7c8b29c3b852436732a4f5a430fd0dae73fa8fa6..0cbf66ed1e12c1e9f5d801932957e2db85590374 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2015.02.21'
+__version__ = '2015.02.26'
author	Philipp Hagemeister <phihag@phihag.de>
	Thu, 26 Feb 2015 00:15:08 +0000 (01:15 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Thu, 26 Feb 2015 00:15:08 +0000 (01:15 +0100)
AUTHORS		patch \| blob \| history
Makefile		patch \| blob \| history
README.md		patch \| blob \| history
docs/supportedsites.md		patch \| blob \| history
test/parameters.json		patch \| blob \| history
test/test_YoutubeDL.py		patch \| blob \| history
test/test_subtitles.py		patch \| blob \| history
test/test_utils.py		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/appletrailers.py		patch \| blob \| history
youtube_dl/extractor/atresplayer.py		patch \| blob \| history
youtube_dl/extractor/bbccouk.py		patch \| blob \| history
youtube_dl/extractor/bliptv.py		patch \| blob \| history
youtube_dl/extractor/bloomberg.py		patch \| blob \| history
youtube_dl/extractor/ceskatelevize.py		patch \| blob \| history
youtube_dl/extractor/chirbit.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/crunchyroll.py		patch \| blob \| history
youtube_dl/extractor/dailymotion.py		patch \| blob \| history
youtube_dl/extractor/drtv.py		patch \| blob \| history
youtube_dl/extractor/eporner.py		patch \| blob \| history
youtube_dl/extractor/escapist.py		patch \| blob \| history
youtube_dl/extractor/facebook.py		patch \| blob \| history
youtube_dl/extractor/gdcvault.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/laola1tv.py		patch \| blob \| history
youtube_dl/extractor/lynda.py		patch \| blob \| history
youtube_dl/extractor/mit.py		patch \| blob \| history
youtube_dl/extractor/mitele.py		patch \| blob \| history
youtube_dl/extractor/mtv.py		patch \| blob \| history
youtube_dl/extractor/npo.py		patch \| blob \| history
youtube_dl/extractor/nrk.py		patch \| blob \| history
youtube_dl/extractor/r7.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/rai.py		patch \| blob \| history
youtube_dl/extractor/rtlnow.py		patch \| blob \| history
youtube_dl/extractor/rtve.py		patch \| blob \| history
youtube_dl/extractor/soundgasm.py		patch \| blob \| history
youtube_dl/extractor/subtitles.py	[deleted file]	patch \| blob \| history
youtube_dl/extractor/ted.py		patch \| blob \| history
youtube_dl/extractor/telecinco.py		patch \| blob \| history
youtube_dl/extractor/theplatform.py		patch \| blob \| history
youtube_dl/extractor/viki.py		patch \| blob \| history
youtube_dl/extractor/vimeo.py		patch \| blob \| history
youtube_dl/extractor/walla.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/options.py		patch \| blob \| history
youtube_dl/postprocessor/ffmpeg.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history