Merge remote-tracking branch 'jaimeMF/vevo_fix'

author Philipp Hagemeister <phihag@phihag.de>

Sun, 23 Jun 2013 17:42:27 +0000 (19:42 +0200)

committer Philipp Hagemeister <phihag@phihag.de>

Sun, 23 Jun 2013 17:42:27 +0000 (19:42 +0200)
author Philipp Hagemeister <phihag@phihag.de>
Sun, 23 Jun 2013 17:42:27 +0000 (19:42 +0200)
committer Philipp Hagemeister <phihag@phihag.de>
Sun, 23 Jun 2013 17:42:27 +0000 (19:42 +0200)
diff --combined youtube_dl/InfoExtractors.py

index f25732bf55f09cf437c2ddca84eb97392615cf5e,af11333d1a6731335d7200e1ff9e1865d37bc629..39d2ef9d4a8b9cfd737e4d5a76850044b6a22917
--- 1/youtube_dl/InfoExtractors.py
--- 2/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@@ -379,6 -379,17 +379,17 @@@ class YoutubeIE(InfoExtractor)
           """Indicate the download will use the RTMP protocol."""
           self.to_screen(u'RTMP download detected')
   
+     @staticmethod
+     def _decrypt_signature(s):
+         """Decrypt the key the two subkeys must have a length of 43"""
+         (a,b) = s.split('.')
+         if len(a) != 43 or len(b) != 43:
+             raise ExtractorError(u'Unable to decrypt signature, subkeys lengths not valid')
+         b = ''.join([b[:8],a[0],b[9:18],b[-4],b[19:39], b[18]])[0:40]
+         a = a[-40:]
+         s_dec = '.'.join((a,b))[::-1]
+         return s_dec
+ 
       def _get_available_subtitles(self, video_id):
           self.report_video_subtitles_download(video_id)
           request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
@@@ -420,7 -431,7 +431,7 @@@
       def _request_automatic_caption(self, video_id, webpage):
           """We need the webpage for getting the captions url, pass it as an
              argument to speed up the process."""
- -        sub_lang = self._downloader.params.get('subtitleslang')
+ +        sub_lang = self._downloader.params.get('subtitleslang') or 'en'
           sub_format = self._downloader.params.get('subtitlesformat')
           self.to_screen(u'%s: Looking for automatic captions' % video_id)
           mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
@@@ -699,14 -710,14 +710,14 @@@
                           pass
                       else:
                           # We report the original error
- -                        self._downloader.report_error(sub_error)
+ +                        self._downloader.report_warning(sub_error)
   
           if self._downloader.params.get('allsubtitles', False):
               video_subtitles = self._extract_all_subtitles(video_id)
               for video_subtitle in video_subtitles:
                   (sub_error, sub_lang, sub) = video_subtitle
                   if sub_error:
- -                    self._downloader.report_error(sub_error)
+ +                    self._downloader.report_warning(sub_error)
   
           if self._downloader.params.get('listsubtitles', False):
               sub_lang_list = self._list_available_subtitles(video_id)
@@@ -724,6 -735,17 +735,17 @@@
           # Decide which formats to download
           req_format = self._downloader.params.get('format', None)
   
+         try:
+             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
+             info = json.loads(mobj.group(1))
+             args = info['args']
+             if args.get('ptk','') == 'vevo' or 'dashmpd':
+                 # Vevo videos with encrypted signatures
+                 self.to_screen(u'Vevo video detected.')
+                 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
+         except ValueError:
+             pass
+ 
           if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
               self.report_rtmp_download()
               video_url_list = [(None, video_info['conn'][0])]
@@@ -735,6 -757,9 +757,9 @@@
                       url = url_data['url'][0]
                       if 'sig' in url_data:
                           url += '&signature=' + url_data['sig'][0]
+                     if 's' in url_data:
+                         signature = self._decrypt_signature(url_data['s'][0])
+                         url += '&signature=' + signature
                       if 'ratebypass' not in url:
                           url += '&ratebypass=yes'
                       url_map[url_data['itag'][0]] = url
@@@ -1096,25 -1121,6 +1121,25 @@@ class VimeoIE(InfoExtractor)
       _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
       IE_NAME = u'vimeo'
   
+ +    def _verify_video_password(self, url, video_id, webpage):
+ +        password = self._downloader.params.get('password', None)
+ +        if password is None:
+ +            raise ExtractorError(u'This video is protected by a password, use the --password option')
+ +        token = re.search(r'xsrft: \'(.*?)\'', webpage).group(1)
+ +        data = compat_urllib_parse.urlencode({'password': password,
+ +                                              'token': token})
+ +        # I didn't manage to use the password with https
+ +        if url.startswith('https'):
+ +            pass_url = url.replace('https','http')
+ +        else:
+ +            pass_url = url
+ +        password_request = compat_urllib_request.Request(pass_url+'/password', data)
+ +        password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ +        password_request.add_header('Cookie', 'xsrft=%s' % token)
+ +        pass_web = self._download_webpage(password_request, video_id,
+ +                                          u'Verifying the password',
+ +                                          u'Wrong password')
+ +
       def _real_extract(self, url, new_video=True):
           # Extract ID from URL
           mobj = re.match(self._VALID_URL, url)
@@@ -1143,10 -1149,6 +1168,10 @@@
           except:
               if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
                   raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option')
+ +
+ +            if re.search('If so please provide the correct password.', webpage):
+ +                self._verify_video_password(url, video_id, webpage)
+ +                return self._real_extract(url)
               else:
                   raise ExtractorError(u'Unable to extract info section')
   
@@@ -1432,13 -1434,6 +1457,13 @@@ class GenericIE(InfoExtractor)
           if mobj is None:
               # Try to find twitter cards info
               mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
+ +        if mobj is None:
+ +            # We look for Open Graph info:
+ +            # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
+ +            m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
+ +            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
+ +            if m_video_type is not None:
+ +                mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
           if mobj is None:
               raise ExtractorError(u'Invalid URL: %s' % url)
   
@@@ -1639,10 -1634,9 +1664,10 @@@ class YoutubePlaylistIE(InfoExtractor)
                   # Number of videos is a multiple of self._MAX_RESULTS
                   break
   
- -            videos += [ (entry['yt$position']['$t'], entry['content']['src'])
- -                        for entry in response['feed']['entry']
- -                        if 'content' in entry ]
+ +            for entry in response['feed']['entry']:
+ +                index = entry['yt$position']['$t']
+ +                if 'media$group' in entry and 'media$player' in entry['media$group']:
+ +                    videos.append((index, entry['media$group']['media$player']['url']))
   
               if len(response['feed']['entry']) < self._MAX_RESULTS:
                   break
@@@ -4575,37 -4569,6 +4600,37 @@@ class GametrailersIE(InfoExtractor)
                   'description': video_description,
                   }
   
+ +class StatigramIE(InfoExtractor):
+ +    _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
+ +
+ +    def _real_extract(self, url):
+ +        mobj = re.match(self._VALID_URL, url)
+ +
+ +        video_id = mobj.group(1)
+ +        webpage = self._download_webpage(url, video_id)
+ +        video_url = self._html_search_regex(
+ +            r'<meta property="og:video:secure_url" content="(.+?)">',
+ +            webpage, u'video URL')
+ +        thumbnail_url = self._html_search_regex(
+ +            r'<meta property="og:image" content="(.+?)" />',
+ +            webpage, u'thumbnail URL', fatal=False)
+ +        html_title = self._html_search_regex(
+ +            r'<title>(.+?)</title>',
+ +            webpage, u'title')
+ +        title = html_title.rpartition(u' | Statigram')[0]
+ +        uploader_id = self._html_search_regex(
+ +            r'@([^ ]+)', title, u'uploader name', fatal=False)
+ +        ext = 'mp4'
+ +
+ +        return [{
+ +            'id':        video_id,
+ +            'url':       video_url,
+ +            'ext':       ext,
+ +            'title':     title,
+ +            'thumbnail': thumbnail_url,
+ +            'uploader_id' : uploader_id
+ +        }]
+ +
   def gen_extractors():
       """ Return a list of an instance of every supported extractor.
       The order does matter; the first extractor matched is the one handling the URL.
@@@ -4672,7 -4635,6 +4697,7 @@@
           HypemIE(),
           Vbox7IE(),
           GametrailersIE(),
+ +        StatigramIE(),
           GenericIE()
       ]
author	Philipp Hagemeister <phihag@phihag.de>
	Sun, 23 Jun 2013 17:42:27 +0000 (19:42 +0200)
committer	Philipp Hagemeister <phihag@phihag.de>
	Sun, 23 Jun 2013 17:42:27 +0000 (19:42 +0200)