Re-enable Dailymotion (tests pass)

[youtube-dl] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index f5e29af9404e7f73b41d2bdfd49ce5f7652648a3..6d8e7be109253306b757d7def8ad142ec7076c01 100755 (executable)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -562,12 +562,7 @@ class YoutubeIE(InfoExtractor):
          mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
          if mobj is not None:
              upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
-            format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
-            for expression in format_expressions:
-                try:
-                    upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
-                except:
-                    pass
+            upload_date = unified_strdate(upload_date)
  
          # description
          video_description = get_element_by_id("eow-description", video_webpage)
@@ -688,9 +683,6 @@ class MetacafeIE(InfoExtractor):
      _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
      IE_NAME = u'metacafe'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_disclaimer(self):
          """Report disclaimer retrieval."""
          self.to_screen(u'Retrieving disclaimer')
@@ -793,10 +785,6 @@ class DailymotionIE(InfoExtractor):
  
      _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
      IE_NAME = u'dailymotion'
-    _WORKING = False
-
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
  
      def _real_extract(self, url):
          # Extract id and simplified title from URL
@@ -879,9 +867,6 @@ class PhotobucketIE(InfoExtractor):
      _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
      IE_NAME = u'photobucket'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def _real_extract(self, url):
          # Extract id from URL
          mobj = re.match(self._VALID_URL, url)
@@ -940,9 +925,6 @@ class YahooIE(InfoExtractor):
      _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
      IE_NAME = u'video.yahoo'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def _real_extract(self, url, new_video=True):
          # Extract ID from URL
          mobj = re.match(self._VALID_URL, url)
@@ -1072,9 +1054,6 @@ class VimeoIE(InfoExtractor):
      _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
      IE_NAME = u'vimeo'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def _real_extract(self, url, new_video=True):
          # Extract ID from URL
          mobj = re.match(self._VALID_URL, url)
@@ -1108,7 +1087,10 @@ class VimeoIE(InfoExtractor):
              config = webpage.split(' = {config:')[1].split(',assets:')[0]
              config = json.loads(config)
          except:
-            self._downloader.report_error(u'unable to extract info section')
+            if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
+                self._downloader.report_error(u'The author has restricted the access to this video, try with the "--referer" option')
+            else:
+                self._downloader.report_error(u'unable to extract info section')
              return
  
          # Extract title
@@ -1185,9 +1167,6 @@ class ArteTvIE(InfoExtractor):
  
      IE_NAME = u'arte.tv'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def fetch_webpage(self, url):
          request = compat_urllib_request.Request(url)
          try:
@@ -1311,9 +1290,6 @@ class GenericIE(InfoExtractor):
      _VALID_URL = r'.*'
      IE_NAME = u'generic'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_download_webpage(self, video_id):
          """Report webpage download."""
          if not self._downloader.params.get('test', False):
@@ -1453,9 +1429,6 @@ class YoutubeSearchIE(InfoExtractor):
      _max_youtube_results = 1000
      IE_NAME = u'youtube:search'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_download_page(self, query, pagenum):
          """Report attempt to download search page with given number."""
          query = query.decode(preferredencoding())
@@ -1530,9 +1503,6 @@ class GoogleSearchIE(InfoExtractor):
      _max_google_results = 1000
      IE_NAME = u'video.google:search'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_download_page(self, query, pagenum):
          """Report attempt to download playlist page with given number."""
          query = query.decode(preferredencoding())
@@ -1614,9 +1584,6 @@ class YahooSearchIE(InfoExtractor):
      _max_yahoo_results = 1000
      IE_NAME = u'video.yahoo:search'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_download_page(self, query, pagenum):
          """Report attempt to download playlist page with given number."""
          query = query.decode(preferredencoding())
@@ -1710,9 +1677,6 @@ class YoutubePlaylistIE(InfoExtractor):
      _MAX_RESULTS = 50
      IE_NAME = u'youtube:playlist'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      @classmethod
      def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
@@ -1753,12 +1717,11 @@ class YoutubePlaylistIE(InfoExtractor):
              if 'feed' not in response:
                  self._downloader.report_error(u'Got a malformed response from YouTube API')
                  return
+            playlist_title = response['feed']['title']['$t']
              if 'entry' not in response['feed']:
                  # Number of videos is a multiple of self._MAX_RESULTS
                  break
  
-            playlist_title = response['feed']['title']['$t']
-
              videos += [ (entry['yt$position']['$t'], entry['content']['src'])
                          for entry in response['feed']['entry']
                          if 'content' in entry ]
@@ -1857,9 +1820,6 @@ class YoutubeUserIE(InfoExtractor):
      _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
      IE_NAME = u'youtube:user'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_download_page(self, username, start_index):
          """Report attempt to download user page."""
          self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
@@ -1926,9 +1886,6 @@ class BlipTVUserIE(InfoExtractor):
      _PAGE_SIZE = 12
      IE_NAME = u'blip.tv:user'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_download_page(self, username, pagenum):
          """Report attempt to download user page."""
          self.to_screen(u'user %s: Downloading video ids from page %d' %
@@ -2254,9 +2211,6 @@ class MyVideoIE(InfoExtractor):
      _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
      IE_NAME = u'myvideo'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def _real_extract(self,url):
          mobj = re.match(self._VALID_URL, url)
          if mobj is None:
@@ -2425,7 +2379,7 @@ class ComedyCentralIE(InfoExtractor):
              shortMediaId = mediaId.split(':')[-1]
              showId = mediaId.split(':')[-2].replace('.com', '')
              officialTitle = itemEl.findall('./title')[0].text
-            officialDate = itemEl.findall('./pubDate')[0].text
+            officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
  
              configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
                          compat_urllib_parse.urlencode({'uri': mediaId}))
@@ -2692,9 +2646,6 @@ class SoundcloudIE(InfoExtractor):
      _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
      IE_NAME = u'soundcloud'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_resolve(self, video_id):
          """Report information extraction."""
          self.to_screen(u'%s: Resolving id' % video_id)
@@ -2738,12 +2689,13 @@ class SoundcloudIE(InfoExtractor):
  
          streams = json.loads(stream_json)
          mediaURL = streams['http_mp3_128_url']
+        upload_date = unified_strdate(info['created_at'])
  
          return [{
              'id':       info['id'],
              'url':      mediaURL,
              'uploader': info['user']['username'],
-            'upload_date':  info['created_at'],
+            'upload_date': upload_date,
              'title':    info['title'],
              'ext':      u'mp3',
              'description': info['description'],
@@ -2759,10 +2711,7 @@ class SoundcloudSetIE(InfoExtractor):
       """
  
      _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
-    IE_NAME = u'soundcloud'
-
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
+    IE_NAME = u'soundcloud:set'
  
      def report_resolve(self, video_id):
          """Report information extraction."""
@@ -2884,9 +2833,6 @@ class MixcloudIE(InfoExtractor):
      _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
      IE_NAME = u'mixcloud'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_download_json(self, file_id):
          """Report JSON download."""
          self.to_screen(u'Downloading json')
@@ -3334,9 +3280,6 @@ class GooglePlusIE(InfoExtractor):
      _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
      IE_NAME = u'plus.google'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
      def report_extract_entry(self, url):
          """Report downloading extry"""
          self.to_screen(u'Downloading entry: %s' % url)
@@ -3612,6 +3555,7 @@ class FunnyOrDieIE(InfoExtractor):
  
  class SteamIE(InfoExtractor):
      _VALID_URL = r"""http://store.steampowered.com/
+                (agecheck/)?
                  (?P<urltype>video|app)/ #If the page is only for videos or for a game
                  (?P<gameID>\d+)/?
                  (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
@@ -3810,7 +3754,7 @@ class YouPornIE(InfoExtractor):
              self._downloader.report_warning(u'unable to extract video date')
              upload_date = None
          else:
-            upload_date = result.group('date').strip()
+            upload_date = unified_strdate(result.group('date').strip())
  
          # Get the video uploader
          result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
@@ -3917,7 +3861,7 @@ class PornotubeIE(InfoExtractor):
          if result is None:
              self._downloader.report_error(u'unable to extract video title')
              return
-        upload_date = result.group('date')
+        upload_date = unified_strdate(result.group('date'))
  
          info = {'id': video_id,
                  'url': video_url,