Merge remote-tracking branch 'jaimeMF/vevo_fix'
authorPhilipp Hagemeister <phihag@phihag.de>
Sun, 23 Jun 2013 17:42:27 +0000 (19:42 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Sun, 23 Jun 2013 17:42:27 +0000 (19:42 +0200)
1  2 
youtube_dl/InfoExtractors.py

index f25732bf55f09cf437c2ddca84eb97392615cf5e,af11333d1a6731335d7200e1ff9e1865d37bc629..39d2ef9d4a8b9cfd737e4d5a76850044b6a22917
@@@ -379,6 -379,17 +379,17 @@@ class YoutubeIE(InfoExtractor)
          """Indicate the download will use the RTMP protocol."""
          self.to_screen(u'RTMP download detected')
  
+     @staticmethod
+     def _decrypt_signature(s):
+         """Decrypt the key the two subkeys must have a length of 43"""
+         (a,b) = s.split('.')
+         if len(a) != 43 or len(b) != 43:
+             raise ExtractorError(u'Unable to decrypt signature, subkeys lengths not valid')
+         b = ''.join([b[:8],a[0],b[9:18],b[-4],b[19:39], b[18]])[0:40]
+         a = a[-40:]
+         s_dec = '.'.join((a,b))[::-1]
+         return s_dec
      def _get_available_subtitles(self, video_id):
          self.report_video_subtitles_download(video_id)
          request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
      def _request_automatic_caption(self, video_id, webpage):
          """We need the webpage for getting the captions url, pass it as an
             argument to speed up the process."""
 -        sub_lang = self._downloader.params.get('subtitleslang')
 +        sub_lang = self._downloader.params.get('subtitleslang') or 'en'
          sub_format = self._downloader.params.get('subtitlesformat')
          self.to_screen(u'%s: Looking for automatic captions' % video_id)
          mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
                          pass
                      else:
                          # We report the original error
 -                        self._downloader.report_error(sub_error)
 +                        self._downloader.report_warning(sub_error)
  
          if self._downloader.params.get('allsubtitles', False):
              video_subtitles = self._extract_all_subtitles(video_id)
              for video_subtitle in video_subtitles:
                  (sub_error, sub_lang, sub) = video_subtitle
                  if sub_error:
 -                    self._downloader.report_error(sub_error)
 +                    self._downloader.report_warning(sub_error)
  
          if self._downloader.params.get('listsubtitles', False):
              sub_lang_list = self._list_available_subtitles(video_id)
          # Decide which formats to download
          req_format = self._downloader.params.get('format', None)
  
+         try:
+             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
+             info = json.loads(mobj.group(1))
+             args = info['args']
+             if args.get('ptk','') == 'vevo' or 'dashmpd':
+                 # Vevo videos with encrypted signatures
+                 self.to_screen(u'Vevo video detected.')
+                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
+         except ValueError:
+             pass
          if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
              self.report_rtmp_download()
              video_url_list = [(None, video_info['conn'][0])]
                      url = url_data['url'][0]
                      if 'sig' in url_data:
                          url += '&signature=' + url_data['sig'][0]
+                     if 's' in url_data:
+                         signature = self._decrypt_signature(url_data['s'][0])
+                         url += '&signature=' + signature
                      if 'ratebypass' not in url:
                          url += '&ratebypass=yes'
                      url_map[url_data['itag'][0]] = url
@@@ -1096,25 -1121,6 +1121,25 @@@ class VimeoIE(InfoExtractor)
      _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
      IE_NAME = u'vimeo'
  
 +    def _verify_video_password(self, url, video_id, webpage):
 +        password = self._downloader.params.get('password', None)
 +        if password is None:
 +            raise ExtractorError(u'This video is protected by a password, use the --password option')
 +        token = re.search(r'xsrft: \'(.*?)\'', webpage).group(1)
 +        data = compat_urllib_parse.urlencode({'password': password,
 +                                              'token': token})
 +        # I didn't manage to use the password with https
 +        if url.startswith('https'):
 +            pass_url = url.replace('https','http')
 +        else:
 +            pass_url = url
 +        password_request = compat_urllib_request.Request(pass_url+'/password', data)
 +        password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
 +        password_request.add_header('Cookie', 'xsrft=%s' % token)
 +        pass_web = self._download_webpage(password_request, video_id,
 +                                          u'Verifying the password',
 +                                          u'Wrong password')
 +
      def _real_extract(self, url, new_video=True):
          # Extract ID from URL
          mobj = re.match(self._VALID_URL, url)
          except:
              if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
                  raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option')
 +
 +            if re.search('If so please provide the correct password.', webpage):
 +                self._verify_video_password(url, video_id, webpage)
 +                return self._real_extract(url)
              else:
                  raise ExtractorError(u'Unable to extract info section')
  
@@@ -1432,13 -1434,6 +1457,13 @@@ class GenericIE(InfoExtractor)
          if mobj is None:
              # Try to find twitter cards info
              mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
 +        if mobj is None:
 +            # We look for Open Graph info:
 +            # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
 +            m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
 +            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
 +            if m_video_type is not None:
 +                mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
          if mobj is None:
              raise ExtractorError(u'Invalid URL: %s' % url)
  
@@@ -1639,10 -1634,9 +1664,10 @@@ class YoutubePlaylistIE(InfoExtractor)
                  # Number of videos is a multiple of self._MAX_RESULTS
                  break
  
 -            videos += [ (entry['yt$position']['$t'], entry['content']['src'])
 -                        for entry in response['feed']['entry']
 -                        if 'content' in entry ]
 +            for entry in response['feed']['entry']:
 +                index = entry['yt$position']['$t']
 +                if 'media$group' in entry and 'media$player' in entry['media$group']:
 +                    videos.append((index, entry['media$group']['media$player']['url']))
  
              if len(response['feed']['entry']) < self._MAX_RESULTS:
                  break
@@@ -4575,37 -4569,6 +4600,37 @@@ class GametrailersIE(InfoExtractor)
                  'description': video_description,
                  }
  
 +class StatigramIE(InfoExtractor):
 +    _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
 +
 +    def _real_extract(self, url):
 +        mobj = re.match(self._VALID_URL, url)
 +
 +        video_id = mobj.group(1)
 +        webpage = self._download_webpage(url, video_id)
 +        video_url = self._html_search_regex(
 +            r'<meta property="og:video:secure_url" content="(.+?)">',
 +            webpage, u'video URL')
 +        thumbnail_url = self._html_search_regex(
 +            r'<meta property="og:image" content="(.+?)" />',
 +            webpage, u'thumbnail URL', fatal=False)
 +        html_title = self._html_search_regex(
 +            r'<title>(.+?)</title>',
 +            webpage, u'title')
 +        title = html_title.rpartition(u' | Statigram')[0]
 +        uploader_id = self._html_search_regex(
 +            r'@([^ ]+)', title, u'uploader name', fatal=False)
 +        ext = 'mp4'
 +
 +        return [{
 +            'id':        video_id,
 +            'url':       video_url,
 +            'ext':       ext,
 +            'title':     title,
 +            'thumbnail': thumbnail_url,
 +            'uploader_id' : uploader_id
 +        }]
 +
  def gen_extractors():
      """ Return a list of an instance of every supported extractor.
      The order does matter; the first extractor matched is the one handling the URL.
          HypemIE(),
          Vbox7IE(),
          GametrailersIE(),
 +        StatigramIE(),
          GenericIE()
      ]