Merge remote-tracking branch 'rzhxeo/embedly'
authorPhilipp Hagemeister <phihag@phihag.de>
Wed, 5 Mar 2014 13:01:53 +0000 (14:01 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Wed, 5 Mar 2014 13:01:53 +0000 (14:01 +0100)
Conflicts:
youtube_dl/extractor/generic.py

1  2 
youtube_dl/extractor/generic.py

index 641d9babb5cea29ff753fe30ab038db84481db37,e84c022a587cf7cae22f3d6ec33fdbf15f07098b..dd60bc418e2214c2c71f2f41a5e629a9ccc74192
@@@ -83,10 -83,10 +83,10 @@@ class GenericIE(InfoExtractor)
          # Direct link to a video
          {
              'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
 -            'file': 'trailer.mp4',
              'md5': '67d406c2bcb6af27fa886f31aa934bbe',
              'info_dict': {
                  'id': 'trailer',
 +                'ext': 'mp4',
                  'title': 'trailer',
                  'upload_date': '20100513',
              }
@@@ -94,6 -94,7 +94,6 @@@
          # ooyala video
          {
              'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
 -            'file': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ.mp4',
              'md5': '5644c6ca5d5782c1d0d350dad9bd840c',
              'info_dict': {
                  'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
                  'title': '2cc213299525360.mov',  # that's what we get
              },
          },
-         }
 +        # google redirect
 +        {
 +            'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 +            'info_dict': {
 +                'id': 'cmQHVoWB5FY',
 +                'ext': 'mp4',
 +                'upload_date': '20130224',
 +                'uploader_id': 'TheVerge',
 +                'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.',
 +                'uploader': 'The Verge',
 +                'title': 'First Firefox OS phones side-by-side',
 +            },
 +            'params': {
 +                'skip_download': False,
 +            }
++        },
+         # embed.ly video
+         {
+             'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
+             'info_dict': {
+                 'id': '9ODmcdjQcHQ',
+                 'ext': 'mp4',
+             },
+             # No need to test YoutubeIE here
+             'params': {
+                 'skip_download': True,
+             },
+         },
      ]
  
      def report_download_webpage(self, video_id):
          if mobj is not None:
              return self.url_result(mobj.group(1), 'Mpora')
  
 -        # Look for embedded Novamov player
 +        # Look for embedded NovaMov player
          mobj = re.search(
              r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage)
          if mobj is not None:
 -            return self.url_result(mobj.group('url'), 'Novamov')
 +            return self.url_result(mobj.group('url'), 'NovaMov')
 +
 +        # Look for embedded NowVideo player
 +        mobj = re.search(
 +            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage)
 +        if mobj is not None:
 +            return self.url_result(mobj.group('url'), 'NowVideo')
  
          # Look for embedded Facebook player
          mobj = re.search(
          if mobj is not None:
              return self.url_result(mobj.group('url'), 'Facebook')
  
 +        # Look for embedded VK player
 +        mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
 +        if mobj is not None:
 +            return self.url_result(mobj.group('url'), 'VK')
 +
          # Look for embedded Huffington Post player
          mobj = re.search(
              r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
          if mobj is not None:
              return self.url_result(mobj.group('url'), 'HuffPost')
  
+         # Look for embed.ly
+         mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
+         if mobj is not None:
+             return self.url_result(mobj.group('url'))
+         mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
+         if mobj is not None:
+             return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
          # Start with something easy: JW Player in SWFObject
          mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
          if mobj is None:
          if mobj is None:
              # HTML5 video
              mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
 +        if mobj is None:
 +            mobj = re.search(
 +                r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
 +                r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"',
 +                webpage)
 +            if mobj:
 +                new_url = mobj.group(1)
 +                self.report_following_redirect(new_url)
 +                return {
 +                    '_type': 'url',
 +                    'url': new_url,
 +                }
          if mobj is None:
              raise ExtractorError('Unsupported URL: %s' % url)