re-worked release workflow, it is one-step and creates GPG signatures now

[youtube-dl] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index 1443c188f2e21671be59f87a84ba8f0aba7b0c0d..7bea26142fc7a7f09c28037a5635de4a70cb46ce 100755 (executable)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -3,6 +3,7 @@
  
  from __future__ import absolute_import
  
+import base64
  import datetime
  import netrc
  import os
@@ -105,12 +106,12 @@ class InfoExtractor(object):
      def IE_NAME(self):
          return type(self).__name__[:-2]
  
-    def _download_webpage(self, url, video_id, note=None, errnote=None):
+    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
          if note is None:
              note = u'Downloading video webpage'
          self._downloader.to_screen(u'[%s] %s: %s' % (self.IE_NAME, video_id, note))
          try:
-            urlh = compat_urllib_request.urlopen(url)
+            urlh = compat_urllib_request.urlopen(url_or_request)
              webpage_bytes = urlh.read()
              return webpage_bytes.decode('utf-8', 'replace')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@@ -411,7 +412,7 @@ class YoutubeIE(InfoExtractor):
  
          # uploader_id
          video_uploader_id = None
-        mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/user/([^"]+)">', video_webpage)
+        mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
          if mobj is not None:
              video_uploader_id = mobj.group(1)
          else:
@@ -674,10 +675,6 @@ class DailymotionIE(InfoExtractor):
      def __init__(self, downloader=None):
          InfoExtractor.__init__(self, downloader)
  
-    def report_download_webpage(self, video_id):
-        """Report webpage download."""
-        self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
-
      def report_extraction(self, video_id):
          """Report information extraction."""
          self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
@@ -696,13 +693,7 @@ class DailymotionIE(InfoExtractor):
          # Retrieve video webpage to extract further information
          request = compat_urllib_request.Request(url)
          request.add_header('Cookie', 'family_filter=off')
-        try:
-            self.report_download_webpage(video_id)
-            webpage_bytes = compat_urllib_request.urlopen(request).read()
-            webpage = webpage_bytes.decode('utf-8')
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
-            return
+        webpage = self._download_webpage(request, video_id)
  
          # Extract URL, uploader and title from webpage
          self.report_extraction(video_id)
@@ -2820,13 +2811,7 @@ class SoundcloudIE(InfoExtractor):
  
  class InfoQIE(InfoExtractor):
      """Information extractor for infoq.com"""
-
      _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
-    IE_NAME = u'infoq'
-
-    def report_webpage(self, video_id):
-        """Report information extraction."""
-        self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
  
      def report_extraction(self, video_id):
          """Report information extraction."""
@@ -2838,38 +2823,29 @@ class InfoQIE(InfoExtractor):
              self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
              return
  
-        self.report_webpage(url)
-
-        request = compat_urllib_request.Request(url)
-        try:
-            webpage = compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
-            return
-
+        webpage = self._download_webpage(url, video_id=url)
          self.report_extraction(url)
  
-
          # Extract video URL
          mobj = re.search(r"jsclassref='([^']*)'", webpage)
          if mobj is None:
              self._downloader.trouble(u'ERROR: unable to extract video url')
              return
-        video_url = 'rtmpe://video.infoq.com/cfx/st/' + compat_urllib_parse.unquote(mobj.group(1).decode('base64'))
-
+        real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8'))
+        video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
  
          # Extract title
          mobj = re.search(r'contentTitle = "(.*?)";', webpage)
          if mobj is None:
              self._downloader.trouble(u'ERROR: unable to extract video title')
              return
-        video_title = mobj.group(1).decode('utf-8')
+        video_title = mobj.group(1)
  
          # Extract description
          video_description = u'No description available.'
          mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', webpage)
          if mobj is not None:
-            video_description = mobj.group(1).decode('utf-8')
+            video_description = mobj.group(1)
  
          video_filename = video_url.split('/')[-1]
          video_id, extension = video_filename.split('.')