Merge pull request #826 from jakeogh/master

[youtube-dl] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index f4e7065d1c7264c25e37224927ea3012579b9fe7..ec52cbcff433ea0d75cd02a9b7343f83af07e99b 100755 (executable)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -610,10 +610,13 @@ class YoutubeIE(InfoExtractor):
              self.report_rtmp_download()
              video_url_list = [(None, video_info['conn'][0])]
          elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
-            url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
-            url_data = [compat_parse_qs(uds) for uds in url_data_strs]
-            url_data = [ud for ud in url_data if 'itag' in ud and 'url' in ud]
-            url_map = dict((ud['itag'][0], ud['url'][0] + '&signature=' + ud['sig'][0]) for ud in url_data)
+            url_map = {}
+            for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
+                url_data = compat_parse_qs(url_data_str)
+                if 'itag' in url_data and 'url' in url_data:
+                    url = url_data['url'][0] + '&signature=' + url_data['sig'][0]
+                    if not 'ratebypass' in url: url += '&ratebypass=yes'
+                    url_map[url_data['itag'][0]] = url
  
              format_limit = self._downloader.params.get('format_limit', None)
              available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
@@ -1465,9 +1468,9 @@ class GoogleSearchIE(InfoExtractor):
          prefix = mobj.group('prefix')
          query = mobj.group('query')
          if prefix == '':
-            return self._download_n_results(query, 1)
+            return self._get_n_results(query, 1)
          elif prefix == 'all':
-            return self._download_n_results(query, self._max_google_results)
+            return self._get_n_results(query, self._max_google_results)
          else:
              n = int(prefix)
              if n <= 0:
@@ -1475,10 +1478,10 @@ class GoogleSearchIE(InfoExtractor):
              elif n > self._max_google_results:
                  self._downloader.report_warning(u'gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
                  n = self._max_google_results
-            return self._download_n_results(query, n)
+            return self._get_n_results(query, n)
  
-    def _download_n_results(self, query, n):
-        """Downloads a specified number of results for a query"""
+    def _get_n_results(self, query, n):
+        """Get a specified number of results for a query"""
  
          res = {
              '_type': 'playlist',
@@ -4124,7 +4127,35 @@ class RedTubeIE(InfoExtractor):
              'ext':      video_extension,
              'title':    video_title,
          }]
+        
+class InaIE(InfoExtractor):
+    """Information Extractor for Ina.fr"""
+    _VALID_URL = r'(?:http://)?(?:www.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
+
+    def _real_extract(self,url):
+        mobj = re.match(self._VALID_URL, url)
+
+        video_id = mobj.group('id')
+        mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
+        video_extension = 'mp4'
+        webpage = self._download_webpage(mrss_url, video_id)
+
+        mobj = re.search(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)', webpage)
+        if mobj is None:
+            raise ExtractorError(u'Unable to extract media URL')
+        video_url = mobj.group(1)
+
+        mobj = re.search(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>', webpage)
+        if mobj is None:
+            raise ExtractorError(u'Unable to extract title')
+        video_title = mobj.group(1)
  
+        return [{
+            'id':       video_id,
+            'url':      video_url,
+            'ext':      video_extension,
+            'title':    video_title,
+        }]
  
  def gen_extractors():
      """ Return a list of an instance of every supported extractor.
@@ -4182,6 +4213,7 @@ def gen_extractors():
          TumblrIE(),
          BandcampIE(),
          RedTubeIE(),
+        InaIE(),
          GenericIE()
      ]