_ Git - youtube-dl/blob - youtube_dl/extractor/cbs.py

   1 from __future__ import unicode_literals
   2
   3 from .common import InfoExtractor
   4
   5
   6 class CBSIE(InfoExtractor):
   7     _VALID_URL = r'https?://(?:(?:www\.)?cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/(?P<id>[^/]+)/.*'
   8
   9     _TESTS = [{
  10         'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
  11         'info_dict': {
  12             'id': '4JUVEwq3wUT7',
  13             'ext': 'flv',
  14             'title': 'Connect Chat feat. Garth Brooks',
  15             'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
  16             'duration': 1495,
  17         },
  18         'params': {
  19             # rtmp download
  20             'skip_download': True,
  21         },
  22         '_skip': 'Blocked outside the US',
  23     }, {
  24         'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
  25         'info_dict': {
  26             'id': 'WWF_5KqY3PK1',
  27             'ext': 'flv',
  28             'title': 'Live on Letterman - St. Vincent',
  29             'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',
  30             'duration': 3221,
  31         },
  32         'params': {
  33             # rtmp download
  34             'skip_download': True,
  35         },
  36         '_skip': 'Blocked outside the US',
  37     }, {
  38         'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
  39         'only_matching': True,
  40     }, {
  41         'url': 'http://colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
  42         'only_matching': True,
  43     }]
  44
  45     def _real_extract(self, url):
  46         video_id = self._match_id(url)
  47         webpage = self._download_webpage(url, video_id)
  48         real_id = self._search_regex(
  49             [r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"],
  50             webpage, 'real video ID')
  51         return self.url_result('theplatform:%s' % real_id)