Merge remote-tracking branch 'dstftw/rutube-channel'

author Philipp Hagemeister <phihag@phihag.de>

Tue, 28 Jan 2014 02:32:22 +0000 (03:32 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Tue, 28 Jan 2014 02:32:22 +0000 (03:32 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Tue, 28 Jan 2014 02:32:22 +0000 (03:32 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Tue, 28 Jan 2014 02:32:22 +0000 (03:32 +0100)
diff --git a/test/test_playlists.py b/test/test_playlists.py

index 3861224b02bc15332214c767e8708048c5bad737..b3ce6f71ef3bdb17a9e700729984bbfd840d30ef 100644 (file)
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -196,11 +196,11 @@ class TestPlaylists(unittest.TestCase):
      def test_imdb_list(self):
          dl = FakeYDL()
          ie = ImdbListIE(dl)
-        result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U')
+        result = ie.extract('http://www.imdb.com/list/JFs9NWw6XI0')
          self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'sMjedvGDd8U')
-        self.assertEqual(result['title'], 'Animated and Family Films')
-        self.assertTrue(len(result['entries']) >= 48)
+        self.assertEqual(result['id'], 'JFs9NWw6XI0')
+        self.assertEqual(result['title'], 'March 23, 2012 Releases')
+        self.assertEqual(len(result['entries']), 7)
  
      def test_khanacademy_topic(self):
          dl = FakeYDL()
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py

index 443294e6f0550f67ca1802a52d012ca9b7ecfecf..717e151d93b6d0717c65cddf91c63e84da2dc0ae 100644 (file)
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -23,7 +23,6 @@ from ..utils import (
  class BrightcoveIE(InfoExtractor):
      _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
      _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
-    _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
  
      _TESTS = [
          {
@@ -71,6 +70,17 @@ class BrightcoveIE(InfoExtractor):
                  'uploader': 'National Ballet of Canada',
              },
          },
+        {
+            # https://github.com/rg3/youtube-dl/issues/2253
+            'url': 'http://v.thestar.com/services/player/bcpid2071349530001?bckey=AQ~~,AAAAuO4KaJE~,gatFNwSKdGDmDpIYqNJ-fTHn_c4z_LH_&bctid=3101154703001',
+            'file': '3101154703001.mp4',
+            'md5': '0ba9446db037002366bab3b3eb30c88c',
+            'info_dict': {
+                'title': 'Still no power',
+                'uploader': 'thestar.com',
+                'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
+            }
+        }
      ]
  
      @classmethod
@@ -131,6 +141,11 @@ class BrightcoveIE(InfoExtractor):
          """Try to extract the brightcove url from the wepbage, returns None
          if it can't be found
          """
+
+        url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
+        if url_m:
+            return url_m.group(1)
+
          m_brightcove = re.search(
              r'''(?sx)<object
              (?:
@@ -183,8 +198,9 @@ class BrightcoveIE(InfoExtractor):
          return self._extract_video_info(video_info)
  
      def _get_playlist_info(self, player_key):
-        playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key,
-                                               player_key, 'Downloading playlist information')
+        info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
+        playlist_info = self._download_webpage(
+            info_url, player_key, 'Downloading playlist information')
  
          json_data = json.loads(playlist_info)
          if 'videoList' not in json_data:
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py

index 1763af020ef418e0a05b77ef854006dd519cf989..7cee505c085cd1601e0b8ce3ab689795b4f94dfd 100644 (file)
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -69,12 +69,9 @@ class ImdbListIE(InfoExtractor):
          list_id = mobj.group('id')
  
          webpage = self._download_webpage(url, list_id)
-        list_code = self._search_regex(
-            r'(?s)<div\s+class="list\sdetail">(.*?)class="see-more"',
-            webpage, 'list code')
          entries = [
              self.url_result('http://www.imdb.com' + m, 'Imdb')
-            for m in re.findall(r'href="(/video/imdb/vi[^"]+)"', webpage)]
+            for m in re.findall(r'href="(/video/imdb/vi[^"]+)"\s+data-type="playlist"', webpage)]
  
          list_title = self._html_search_regex(
              r'<h1 class="header">(.*?)</h1>', webpage, 'list title')
diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py

index 6d61f9a90c54534cabc3b09c74f63f6a7c1c707f..db2028e9f5148d37f69f0d7c4c41fbbb77d88928 100644 (file)
--- a/youtube_dl/extractor/la7.py
+++ b/youtube_dl/extractor/la7.py
@@ -26,7 +26,8 @@ class LA7IE(InfoExtractor):
              'title': 'IL DIVO',
              'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti  e Flavio Bucci',
              'duration': 6254,
-        }
+        },
+        'skip': 'Blocked in the US',
      }
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py

index 83459c52251110c9adfa3c5cb1cfcc577f3c7354..4922dd764eb119a4993bad607a3a01afccf99f51 100644 (file)
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@@ -15,7 +15,7 @@ from ..utils import (
  
  class RutubeIE(InfoExtractor):
      IE_NAME = 'rutube'
-    IE_DESC = 'Rutube videos'    
+    IE_DESC = 'Rutube videos'
      _VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})'
  
      _TEST = {
@@ -71,7 +71,7 @@ class RutubeIE(InfoExtractor):
  
  class RutubeChannelIE(InfoExtractor):
      IE_NAME = 'rutube:channel'
-    IE_DESC = 'Rutube channels'    
+    IE_DESC = 'Rutube channels'
      _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)'
  
      _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
@@ -79,15 +79,16 @@ class RutubeChannelIE(InfoExtractor):
      def _extract_videos(self, channel_id, channel_title=None):
          entries = []
          for pagenum in itertools.count(1):
-            api_response = self._download_webpage(self._PAGE_TEMPLATE % (channel_id, pagenum),
-                                                   channel_id, 'Downloading page %s' % pagenum)
+            api_response = self._download_webpage(
+                self._PAGE_TEMPLATE % (channel_id, pagenum),
+                channel_id, 'Downloading page %s' % pagenum)
              page = json.loads(api_response)
              results = page['results']
-            if len(results) == 0:
-                break;
+            if not results:
+                break
              entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results)
-            if page['has_next'] is False:
-                break;
+            if not page['has_next']:
+                break
          return self.playlist_result(entries, channel_id, channel_title)
  
      def _real_extract(self, url):
@@ -98,7 +99,7 @@ class RutubeChannelIE(InfoExtractor):
  
  class RutubeMovieIE(RutubeChannelIE):
      IE_NAME = 'rutube:movie'
-    IE_DESC = 'Rutube movies'    
+    IE_DESC = 'Rutube movies'
      _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)'
  
      _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
@@ -107,8 +108,9 @@ class RutubeMovieIE(RutubeChannelIE):
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          movie_id = mobj.group('id')
-        api_response = self._download_webpage(self._MOVIE_TEMPLATE % movie_id, movie_id,
-                                            'Downloading movie JSON')
+        api_response = self._download_webpage(
+            self._MOVIE_TEMPLATE % movie_id, movie_id,
+            'Downloading movie JSON')
          movie = json.loads(api_response)
          movie_name = movie['name']
          return self._extract_videos(movie_id, movie_name)
@@ -119,4 +121,4 @@ class RutubePersonIE(RutubeChannelIE):
      IE_DESC = 'Rutube person videos'
      _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)'
  
-    _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
-\ No newline at end of file
+    _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
author	Philipp Hagemeister <phihag@phihag.de>
	Tue, 28 Jan 2014 02:32:22 +0000 (03:32 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Tue, 28 Jan 2014 02:32:22 +0000 (03:32 +0100)
test/test_playlists.py		patch \| blob \| history
youtube_dl/extractor/brightcove.py		patch \| blob \| history
youtube_dl/extractor/imdb.py		patch \| blob \| history
youtube_dl/extractor/la7.py		patch \| blob \| history
youtube_dl/extractor/rutube.py		patch \| blob \| history