working - worldstarhiphop IE

[youtube-dl] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index 021579ce01120dffa25e8d58ca54edb8ab72ac19..f69bad4f3a77d49be2d8b9cac3c6c256f4ee9898 100755 (executable)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -74,13 +74,15 @@ class InfoExtractor(object):
          self._ready = False
          self.set_downloader(downloader)
  
-    def suitable(self, url):
+    @classmethod
+    def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
-        return re.match(self._VALID_URL, url) is not None
+        return re.match(cls._VALID_URL, url) is not None
  
-    def working(self):
+    @classmethod
+    def working(cls):
          """Getter method for _WORKING."""
-        return self._WORKING
+        return cls._WORKING
  
      def initialize(self):
          """Initializes an instance (authentication, etc)."""
@@ -137,7 +139,6 @@ class YoutubeIE(InfoExtractor):
                           (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
                              tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains
                           (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
-                         (?!view_play_list|my_playlists|artist|playlist)      # ignore playlist URLs
                           (?:                                                  # the various things that can precede the ID:
                               (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
                               |(?:                                             # or the v= param in all its forms
@@ -189,9 +190,11 @@ class YoutubeIE(InfoExtractor):
      }
      IE_NAME = u'youtube'
  
-    def suitable(self, url):
+    @classmethod
+    def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
-        return re.match(self._VALID_URL, url, re.VERBOSE) is not None
+        if YoutubePlaylistIE.suitable(url): return False
+        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  
      def report_lang(self):
          """Report attempt to set language."""
@@ -305,7 +308,7 @@ class YoutubeIE(InfoExtractor):
                  else:
                      raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
              except (IOError, netrc.NetrcParseError) as err:
-                self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
+                self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
                  return
  
          # Set language
@@ -314,7 +317,7 @@ class YoutubeIE(InfoExtractor):
              self.report_lang()
              compat_urllib_request.urlopen(request).read()
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err))
+            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
              return
  
          # No authentication to be performed
@@ -325,7 +328,7 @@ class YoutubeIE(InfoExtractor):
          try:
              login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.to_stderr(u'WARNING: unable to fetch login page: %s' % compat_str(err))
+            self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
              return
  
          galx = None
@@ -369,10 +372,10 @@ class YoutubeIE(InfoExtractor):
              self.report_login()
              login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
              if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
-                self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
+                self._downloader.report_warning(u'unable to log in: bad username or password')
                  return
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
+            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
              return
  
          # Confirm age
@@ -1453,7 +1456,7 @@ class YoutubeSearchIE(InfoExtractor):
                      self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
                      return
                  elif n > self._max_youtube_results:
-                    self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
+                    self._downloader.report_warning(u'ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
                      n = self._max_youtube_results
                  self._download_n_results(query, n)
                  return
@@ -1479,6 +1482,10 @@ class YoutubeSearchIE(InfoExtractor):
                  return
              api_response = json.loads(data)['data']
  
+            if not 'items' in api_response:
+                self._downloader.trouble(u'[youtube] No video results')
+                return
+
              new_ids = list(video['id'] for video in api_response['items'])
              video_ids += new_ids
  
@@ -1531,7 +1538,7 @@ class GoogleSearchIE(InfoExtractor):
                      self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
                      return
                  elif n > self._max_google_results:
-                    self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
+                    self._downloader.report_warning(u'gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
                      n = self._max_google_results
                  self._download_n_results(query, n)
                  return
@@ -1615,7 +1622,7 @@ class YahooSearchIE(InfoExtractor):
                      self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
                      return
                  elif n > self._max_yahoo_results:
-                    self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
+                    self._downloader.report_warning(u'yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
                      n = self._max_yahoo_results
                  self._download_n_results(query, n)
                  return
@@ -1668,17 +1675,17 @@ class YoutubePlaylistIE(InfoExtractor):
                          (?:\w+\.)?
                          youtube\.com/
                          (?:
-                           (?:course|view_play_list|my_playlists|artist|playlist)
-                           \? .*? (p|a|list)=
+                           (?:course|view_play_list|my_playlists|artist|playlist|watch)
+                           \? (?:.*?&)*? (?:p|a|list)=
                          |  user/.*?/user/
                          |  p/
                          |  user/.*?#[pg]/c/
                          )
-                        (?:PL|EC)?
-                     |PL|EC)
-                     ([0-9A-Za-z-_]{10,})
-                     (?:/.*?/([0-9A-Za-z_-]+))?
-                     .*"""
+                        ((?:PL|EC|UU)?[0-9A-Za-z-_]{10,})
+                        .*
+                     |
+                        ((?:PL|EC|UU)[0-9A-Za-z-_]{10,})
+                     )"""
      _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json'
      _MAX_RESULTS = 50
      IE_NAME = u'youtube:playlist'
@@ -1686,9 +1693,10 @@ class YoutubePlaylistIE(InfoExtractor):
      def __init__(self, downloader=None):
          InfoExtractor.__init__(self, downloader)
  
-    def suitable(self, url):
+    @classmethod
+    def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
-        return re.match(self._VALID_URL, url, re.VERBOSE) is not None
+        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  
      def report_download_page(self, playlist_id, pagenum):
          """Report attempt to download playlist page with given number."""
@@ -1701,13 +1709,8 @@ class YoutubePlaylistIE(InfoExtractor):
              self._downloader.trouble(u'ERROR: invalid url: %s' % url)
              return
  
-        # Single video case
-        if mobj.group(3) is not None:
-            self._downloader.download([mobj.group(3)])
-            return
-
          # Download playlist videos from API
-        playlist_id = mobj.group(2)
+        playlist_id = mobj.group(1) or mobj.group(2)
          page_num = 1
          videos = []
  
@@ -1727,14 +1730,18 @@ class YoutubePlaylistIE(InfoExtractor):
                  self._downloader.trouble(u'ERROR: Invalid JSON in API response: ' + compat_str(err))
                  return
  
-            videos += [(entry['yt$position']['$t'], entry['content']['src']) for entry in response['feed']['entry']]
+            if not 'feed' in response or not 'entry' in response['feed']:
+                self._downloader.trouble(u'ERROR: Got a malformed response from YouTube API')
+                return
+            videos += [ (entry['yt$position']['$t'], entry['content']['src'])
+                        for entry in response['feed']['entry']
+                        if 'content' in entry ]
  
              if len(response['feed']['entry']) < self._MAX_RESULTS:
                  break
              page_num += 1
  
-        videos = map(operator.itemgetter(1), sorted(videos))
-
+        videos = [v[1] for v in sorted(videos)]
          total = len(videos)
  
          playliststart = self._downloader.params.get('playliststart', 1) - 1
@@ -2073,7 +2080,7 @@ class FacebookIE(InfoExtractor):
                  else:
                      raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
              except (IOError, netrc.NetrcParseError) as err:
-                self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
+                self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
                  return
  
          if useremail is None:
@@ -2090,10 +2097,10 @@ class FacebookIE(InfoExtractor):
              self.report_login()
              login_results = compat_urllib_request.urlopen(request).read()
              if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
-                self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
+                self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
                  return
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
+            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
              return
  
      def _real_extract(self, url):
@@ -2158,6 +2165,17 @@ class BlipTVIE(InfoExtractor):
              self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
              return
  
+        urlp = compat_urllib_parse_urlparse(url)
+        if urlp.path.startswith('/play/'):
+            request = compat_urllib_request.Request(url)
+            response = compat_urllib_request.urlopen(request)
+            redirecturl = response.geturl()
+            rurlp = compat_urllib_parse_urlparse(redirecturl)
+            file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
+            url = 'http://blip.tv/a/a-' + file_id
+            return self._real_extract(url)
+
+
          if '?' in url:
              cchar = '&'
          else:
@@ -2313,9 +2331,10 @@ class ComedyCentralIE(InfoExtractor):
          '400': '384x216',
      }
  
-    def suitable(self, url):
+    @classmethod
+    def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
-        return re.match(self._VALID_URL, url, re.VERBOSE) is not None
+        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  
      def report_extraction(self, episode_id):
          self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
@@ -2538,7 +2557,7 @@ class EscapistIE(InfoExtractor):
              'uploader': showName,
              'upload_date': None,
              'title': showName,
-            'ext': 'flv',
+            'ext': 'mp4',
              'thumbnail': imgUrl,
              'description': description,
              'player_url': playerUrl,
@@ -3572,55 +3591,6 @@ class FunnyOrDieIE(InfoExtractor):
          }
          return [info]
  
-class TweetReelIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?tweetreel\.com/[?](?P<id>[0-9a-z]+)$'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
-            return
-
-        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, video_id)
-
-        m = re.search(r'<div id="left" status_id="([0-9]+)">', webpage)
-        if not m:
-            self._downloader.trouble(u'ERROR: Cannot find status ID')
-        status_id = m.group(1)
-
-        m = re.search(r'<div class="tweet_text">(.*?)</div>', webpage, flags=re.DOTALL)
-        if not m:
-            self._downloader.trouble(u'WARNING: Cannot find description')
-        desc = unescapeHTML(re.sub('<a.*?</a>', '', m.group(1))).strip()
-
-        m = re.search(r'<div class="tweet_info">.*?from <a target="_blank" href="https?://twitter.com/(?P<uploader_id>.+?)">(?P<uploader>.+?)</a>', webpage, flags=re.DOTALL)
-        if not m:
-            self._downloader.trouble(u'ERROR: Cannot find uploader')
-        uploader = unescapeHTML(m.group('uploader'))
-        uploader_id = unescapeHTML(m.group('uploader_id'))
-
-        m = re.search(r'<span unixtime="([0-9]+)"', webpage)
-        if not m:
-            self._downloader.trouble(u'ERROR: Cannot find upload date')
-        upload_date = datetime.datetime.fromtimestamp(int(m.group(1))).strftime('%Y%m%d')
-
-        title = desc
-        video_url = 'http://files.tweetreel.com/video/' + status_id + '.mov'
-
-        info = {
-            'id': video_id,
-            'url': video_url,
-            'ext': 'mov',
-            'title': title,
-            'description': desc,
-            'uploader': uploader,
-            'uploader_id': uploader_id,
-            'internal_id': status_id,
-            'upload_date': upload_date
-        }
-        return [info]
-
  class SteamIE(InfoExtractor):
      _VALID_URL = r"""http://store.steampowered.com/
                  (?P<urltype>video|app)/ #If the page is only for videos or for a game
@@ -3628,9 +3598,10 @@ class SteamIE(InfoExtractor):
                  (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
                  """
  
-    def suitable(self, url):
+    @classmethod
+    def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
-        return re.match(self._VALID_URL, url, re.VERBOSE) is not None
+        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  
      def _real_extract(self, url):
          m = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -3683,6 +3654,62 @@ class UstreamIE(InfoExtractor):
                    }
          return [info]
  
+class WorldStarHipHopIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
+    IE_NAME = u'WorldStarHipHop'
+
+    def _real_extract(self, url):
+        _src_url = r"""(http://hw-videos.*(?:mp4|flv))"""
+
+        webpage_src = compat_urllib_request.urlopen(str(url)).read()
+        webpage_src = webpage_src.decode('utf-8')
+
+        mobj = re.search(_src_url, webpage_src)
+
+        if mobj is not None:
+            video_url = mobj.group()
+            if 'mp4' in video_url:
+                ext = 'mp4'
+            else:
+                ext = 'flv'
+        else:
+            video_url = None
+            ext = None
+
+        _title = r"""<title>(.*)</title>"""
+
+        mobj = re.search(_title, webpage_src)
+        
+        if mobj is not None:
+            title = mobj.group(1)
+        else:
+            title = 'World Start Hip Hop - %s' % time.ctime()
+
+        _thumbnail = r"""rel="image_src" href="(.*)" />"""
+        mobj = re.search(_thumbnail, webpage_src)
+
+        # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
+        if mobj is not None:
+            thumbnail = mobj.group(1)
+        else:
+            _title = r"""candytitles.*>(.*)</span>"""
+            mobj = re.search(_title, webpage_src)
+            if mobj is not None:
+                title = mobj.group(1)
+            thumbnail = None
+
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('id')
+        
+        results = [{
+                    'id': video_id,
+                    'url' : video_url,
+                    'title' : title,
+                    'thumbnail' : thumbnail,
+                    'ext' : ext,
+                    }]
+        return results
+
  class RBMARadioIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
  
@@ -3758,7 +3785,7 @@ class YouPornIE(InfoExtractor):
          # Get the video date
          result = re.search(r'Date:</label>(?P<date>.*) </li>', webpage)
          if result is None:
-            self._downloader.to_stderr(u'WARNING: unable to extract video date')
+            self._downloader.report_warning(u'unable to extract video date')
              upload_date = None
          else:
              upload_date = result.group('date').strip()
@@ -3766,7 +3793,7 @@ class YouPornIE(InfoExtractor):
          # Get the video uploader
          result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
          if result is None:
-            self._downloader.to_stderr(u'WARNING: unable to extract uploader')
+            self._downloader.report_warning(u'unable to extract uploader')
              video_uploader = None
          else:
              video_uploader = result.group('uploader').strip()
@@ -4004,9 +4031,10 @@ class TEDIE(InfoExtractor):
                     /(?P<name>\w+) # Here goes the name and then ".html"
                     '''
  
-    def suitable(self, url):
+    @classmethod
+    def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
-        return re.match(self._VALID_URL, url, re.VERBOSE) is not None
+        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  
      def _real_extract(self, url):
          m=re.match(self._VALID_URL, url, re.VERBOSE)
@@ -4161,9 +4189,9 @@ def gen_extractors():
          GooglePlusIE(),
          ArteTvIE(),
          NBAIE(),
+        WorldStarHipHopIE(),
          JustinTVIE(),
          FunnyOrDieIE(),
-        TweetReelIE(),
          SteamIE(),
          UstreamIE(),
          RBMARadioIE(),