Merge pull request #6372 from raphaelm/patch-1

author Sergey M. <dstftw@gmail.com>

Tue, 28 Jul 2015 20:46:53 +0000 (02:46 +0600)

committer Sergey M. <dstftw@gmail.com>

Tue, 28 Jul 2015 20:46:53 +0000 (02:46 +0600)
author Sergey M. <dstftw@gmail.com>
Tue, 28 Jul 2015 20:46:53 +0000 (02:46 +0600)
committer Sergey M. <dstftw@gmail.com>
Tue, 28 Jul 2015 20:46:53 +0000 (02:46 +0600)
diff --git a/docs/supportedsites.md b/docs/supportedsites.md

index 73445137f3f165de4d98b6f1c367553f5ce9345a..657935dc6597b00dab2c45e37380a4c52968f139 100644 (file)
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -51,6 +51,7 @@
   - **bambuser:channel**
   - **Bandcamp**
   - **Bandcamp:album**
+ - **bbc**: BBC
   - **bbc.co.uk**: BBC iPlayer
   - **BeatportPro**
   - **Beeg**
@@ -224,6 +225,7 @@
   - **InternetVideoArchive**
   - **IPrima**
   - **iqiyi**: 爱奇艺
+ - **Ir90Tv**
   - **ivi**: ivi.ru
   - **ivi:compilation**: ivi.ru compilations
   - **Izlesene**
@@ -252,6 +254,7 @@
   - **kuwo:song**: 酷我音乐
   - **la7.tv**
   - **Laola1Tv**
+ - **Lecture2Go**
   - **Letv**: 乐视网
   - **LetvPlaylist**
   - **LetvTv**
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py

index 66e52641bb8a672c97184fa046f7ef9a0c0adf99..01d07c9c0c3e9dedb41ba560e7aecae5ff26c2e5 100644 (file)
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -450,6 +450,14 @@ class BBCIE(BBCCoUkIE):
          },
          'playlist_count': 9,
          'skip': 'Save time',
+    }, {
+        # article with multiple videos embedded with `new SMP()`
+        'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
+        'info_dict': {
+            'id': '3662a707-0af9-3149-963f-47bea720b460',
+            'title': 'BBC Blogs - Adam Curtis - BUGGER',
+        },
+        'playlist_count': 18,
      }, {
          # single video embedded with mediaAssetPage.init()
          'url': 'http://www.bbc.com/news/world-europe-32041533',
@@ -637,12 +645,30 @@ class BBCIE(BBCCoUkIE):
  
          playlist_title = self._html_search_regex(
              r'<title>(.*?)(?:\s*-\s*BBC [^ ]+)?</title>', webpage, 'playlist title')
-        playlist_description = self._og_search_description(webpage)
+        playlist_description = self._og_search_description(webpage, default=None)
+
+        def extract_all(pattern):
+            return list(filter(None, map(
+                lambda s: self._parse_json(s, playlist_id, fatal=False),
+                re.findall(pattern, webpage))))
+
+        # Multiple video article (e.g.
+        # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
+        EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+[\da-z]{8}(?:\b[^"]+)?'
+        entries = []
+        for match in extract_all(r'new\s+SMP\(({.+?})\)'):
+            embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
+            if embed_url and re.match(EMBED_URL, embed_url):
+                entries.append(embed_url)
+        entries.extend(re.findall(
+            r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
+        if entries:
+            return self.playlist_result(
+                [self.url_result(entry, 'BBCCoUk') for entry in entries],
+                playlist_id, playlist_title, playlist_description)
  
          # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
-        medias = list(filter(None, map(
-            lambda s: self._parse_json(s, playlist_id, fatal=False),
-            re.findall(r"data-media-meta='({[^']+})'", webpage))))
+        medias = extract_all(r"data-media-meta='({[^']+})'")
  
          if not medias:
              # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 6d2efb22e784ecd40dcdebe5195a0d8dde63d632..8cef61c3c9a235a6d3f3230b8517222334dc0fbc 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -276,14 +276,6 @@ class GenericIE(InfoExtractor):
                  'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
              },
          },
-        # BBC iPlayer embeds
-        {
-            'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
-            'info_dict': {
-                'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
-            },
-            'playlist_mincount': 18,
-        },
          # RUTV embed
          {
              'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 280afdd7f8f21cc058768eba232a11672ca32761..fa157cadb232c7238206f568986f055d2960073f 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2015.07.21'
+__version__ = '2015.07.28'
author	Sergey M. <dstftw@gmail.com>
	Tue, 28 Jul 2015 20:46:53 +0000 (02:46 +0600)
committer	Sergey M. <dstftw@gmail.com>
	Tue, 28 Jul 2015 20:46:53 +0000 (02:46 +0600)
docs/supportedsites.md		patch \| blob \| history
youtube_dl/extractor/bbc.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history