[youtube] Fix extraction.

[youtube-dl] / youtube_dl / extractor / njpwworld.py
diff --git a/youtube_dl/extractor/njpwworld.py b/youtube_dl/extractor/njpwworld.py

index f5e3f6815e6e9fee65a386b8ba93b9e6ba0e4d83..025c5d249c4a0daa2032cf639b64c5c27f2974fb 100644 (file)
--- a/youtube_dl/extractor/njpwworld.py
+++ b/youtube_dl/extractor/njpwworld.py
@@ -6,6 +6,7 @@ import re
  from .common import InfoExtractor
  from ..compat import compat_urlparse
  from ..utils import (
+    extract_attributes,
      get_element_by_class,
      urlencode_postdata,
  )
@@ -30,6 +31,8 @@ class NJPWWorldIE(InfoExtractor):
          'skip': 'Requires login',
      }
  
+    _LOGIN_URL = 'https://front.njpwworld.com/auth/login'
+
      def _real_initialize(self):
          self._login()
  
@@ -39,12 +42,17 @@ class NJPWWorldIE(InfoExtractor):
          if not username:
              return True
  
+        # Setup session (will set necessary cookies)
+        self._request_webpage(
+            'https://njpwworld.com/', None, note='Setting up session')
+
          webpage, urlh = self._download_webpage_handle(
-            'https://njpwworld.com/auth/login', None,
+            self._LOGIN_URL, None,
              note='Logging in', errnote='Unable to login',
-            data=urlencode_postdata({'login_id': username, 'pw': password}))
+            data=urlencode_postdata({'login_id': username, 'pw': password}),
+            headers={'Referer': 'https://front.njpwworld.com/auth'})
          # /auth/login will return 302 for successful logins
-        if urlh.geturl() == 'https://njpwworld.com/auth/login':
+        if urlh.geturl() == self._LOGIN_URL:
              self.report_warning('unable to login')
              return False
  
@@ -56,17 +64,24 @@ class NJPWWorldIE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
  
          formats = []
-        for player_url, kind in re.findall(r'<a[^>]+href="(/player[^"]+)".+?<img[^>]+src="[^"]+qf_btn_([^".]+)', webpage):
-            player_url = compat_urlparse.urljoin(url, player_url)
-
+        for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage):
+            player = extract_attributes(mobj.group(0))
+            player_path = player.get('href')
+            if not player_path:
+                continue
+            kind = self._search_regex(
+                r'(low|high)$', player.get('class') or '', 'kind',
+                default='low')
+            player_url = compat_urlparse.urljoin(url, player_path)
              player_page = self._download_webpage(
                  player_url, video_id, note='Downloading player page')
-
              entries = self._parse_html5_media_entries(
                  player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
-                m3u8_entry_protocol='m3u8_native',
-                preference=2 if 'hq' in kind else 1)
-            formats.extend(entries[0]['formats'])
+                m3u8_entry_protocol='m3u8_native')
+            kind_formats = entries[0]['formats']
+            for f in kind_formats:
+                f['quality'] = 2 if kind == 'high' else 1
+            formats.extend(kind_formats)
  
          self._sort_formats(formats)