Merge pull request #5588 from aajanki/encode_frag_filenames

[youtube-dl] / youtube_dl / extractor / twitch.py
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py

index b7a72a7bdccf74bc3027e0930d262d5c173a4aec..023911c41f4b05ef0a8137e9cf8fede2cc94cf6d 100644 (file)
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -7,12 +7,17 @@ import random
  
  from .common import InfoExtractor
  from ..compat import (
+    compat_parse_qs,
      compat_str,
      compat_urllib_parse,
+    compat_urllib_parse_urlparse,
      compat_urllib_request,
+    compat_urlparse,
  )
  from ..utils import (
      ExtractorError,
+    int_or_none,
+    parse_duration,
      parse_iso8601,
  )
  
@@ -23,7 +28,7 @@ class TwitchBaseIE(InfoExtractor):
      _API_BASE = 'https://api.twitch.tv'
      _USHER_BASE = 'http://usher.twitch.tv'
      _LOGIN_URL = 'https://secure.twitch.tv/login'
-    _LOGIN_POST_URL = 'https://passport.twitch.tv/authorize'
+    _LOGIN_POST_URL = 'https://passport.twitch.tv/authentications/new'
      _NETRC_MACHINE = 'twitch'
  
      def _handle_error(self, response):
@@ -59,26 +64,35 @@ class TwitchBaseIE(InfoExtractor):
          login_page = self._download_webpage(
              self._LOGIN_URL, None, 'Downloading login page')
  
-        login_form = dict(re.findall(
-            r'<input\s+type="hidden"\s+name="([^"]+)"\s+(?:id="[^"]+"\s+)?value="([^"]*)"',
-            login_page))
+        login_form = self._hidden_inputs(login_page)
  
          login_form.update({
-            'login': username,
-            'password': password,
+            'login': username.encode('utf-8'),
+            'password': password.encode('utf-8'),
          })
  
+        post_url = self._search_regex(
+            r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+            'post url', default=self._LOGIN_POST_URL, group='url')
+
+        if not post_url.startswith('http'):
+            post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
          request = compat_urllib_request.Request(
-            self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+            post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
          request.add_header('Referer', self._LOGIN_URL)
          response = self._download_webpage(
              request, None, 'Logging in as %s' % username)
  
-        m = re.search(
-            r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
-        if m:
+        error_message = self._search_regex(
+            r'<div[^>]+class="subwindow_notice"[^>]*>([^<]+)</div>',
+            response, 'error message', default=None)
+        if error_message:
              raise ExtractorError(
-                'Unable to login: %s' % m.group('msg').strip(), expected=True)
+                'Unable to login. Twitch said: %s' % error_message, expected=True)
+
+        if '>Reset your password<' in response:
+            self.report_warning('Twitch asks you to reset your password, go to https://secure.twitch.tv/reset/submit')
  
      def _prefer_source(self, formats):
          try:
@@ -127,14 +141,14 @@ class TwitchItemBaseIE(TwitchBaseIE):
      def _extract_info(self, info):
          return {
              'id': info['_id'],
-            'title': info['title'],
-            'description': info['description'],
-            'duration': info['length'],
-            'thumbnail': info['preview'],
-            'uploader': info['channel']['display_name'],
-            'uploader_id': info['channel']['name'],
-            'timestamp': parse_iso8601(info['recorded_at']),
-            'view_count': info['views'],
+            'title': info.get('title') or 'Untitled Broadcast',
+            'description': info.get('description'),
+            'duration': int_or_none(info.get('length')),
+            'thumbnail': info.get('preview'),
+            'uploader': info.get('channel', {}).get('display_name'),
+            'uploader_id': info.get('channel', {}).get('name'),
+            'timestamp': parse_iso8601(info.get('recorded_at')),
+            'view_count': int_or_none(info.get('views')),
          }
  
      def _real_extract(self, url):
@@ -182,8 +196,8 @@ class TwitchVodIE(TwitchItemBaseIE):
      _ITEM_TYPE = 'vod'
      _ITEM_SHORTCUT = 'v'
  
-    _TEST = {
-        'url': 'http://www.twitch.tv/riotgames/v/6528877',
+    _TESTS = [{
+        'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
          'info_dict': {
              'id': 'v6528877',
              'ext': 'mp4',
@@ -195,12 +209,32 @@ class TwitchVodIE(TwitchItemBaseIE):
              'uploader': 'Riot Games',
              'uploader_id': 'riotgames',
              'view_count': int,
+            'start_time': 310,
          },
          'params': {
              # m3u8 download
              'skip_download': True,
          },
-    }
+    }, {
+        # Untitled broadcast (title is None)
+        'url': 'http://www.twitch.tv/belkao_o/v/11230755',
+        'info_dict': {
+            'id': 'v11230755',
+            'ext': 'mp4',
+            'title': 'Untitled Broadcast',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 1638,
+            'timestamp': 1439746708,
+            'upload_date': '20150816',
+            'uploader': 'BelkAO_o',
+            'uploader_id': 'belkao_o',
+            'view_count': int,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }]
  
      def _real_extract(self, url):
          item_id = self._match_id(url)
@@ -214,6 +248,12 @@ class TwitchVodIE(TwitchItemBaseIE):
              item_id, 'mp4')
          self._prefer_source(formats)
          info['formats'] = formats
+
+        parsed_url = compat_urllib_parse_urlparse(url)
+        query = compat_parse_qs(parsed_url.query)
+        if 't' in query:
+            info['start_time'] = parse_duration(query['t'][0])
+
          return info
  
  
@@ -308,9 +348,9 @@ class TwitchBookmarksIE(TwitchPlaylistBaseIE):
  
  class TwitchStreamIE(TwitchBaseIE):
      IE_NAME = 'twitch:stream'
-    _VALID_URL = r'%s/(?P<id>[^/]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
+    _VALID_URL = r'%s/(?P<id>[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.twitch.tv/shroomztv',
          'info_dict': {
              'id': '12772022048',
@@ -329,7 +369,10 @@ class TwitchStreamIE(TwitchBaseIE):
              # m3u8 download
              'skip_download': True,
          },
-    }
+    }, {
+        'url': 'http://www.twitch.tv/miracle_doto#profile-0',
+        'only_matching': True,
+    }]
  
      def _real_extract(self, url):
          channel_id = self._match_id(url)
@@ -344,6 +387,12 @@ class TwitchStreamIE(TwitchBaseIE):
                  'http://www.twitch.tv/%s/profile' % channel_id,
                  'TwitchProfile', channel_id)
  
+        # Channel name may be typed if different case than the original channel name
+        # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing
+        # an invalid m3u8 URL. Working around by use of original channel name from stream
+        # JSON and fallback to lowercase if it's not available.
+        channel_id = stream.get('channel', {}).get('name') or channel_id.lower()
+
          access_token = self._download_json(
              '%s/api/channels/%s/access_token' % (self._API_BASE, channel_id), channel_id,
              'Downloading channel access token')