Merge branch 'playtvak' of https://github.com/oskar456/youtube-dl into oskar456-playtvak

[youtube-dl] / youtube_dl / extractor / common.py
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 717dcec7b8e39156ad5cc33aadf64ac16e92c47a..65835d257197361a7ea3e5159b37de6f03ec62ad 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -15,6 +15,7 @@ import xml.etree.ElementTree
  from ..compat import (
      compat_cookiejar,
      compat_cookies,
+    compat_getpass,
      compat_HTTPError,
      compat_http_client,
      compat_urllib_error,
@@ -39,6 +40,8 @@ from ..utils import (
      sanitize_filename,
      unescapeHTML,
      url_basename,
+    xpath_text,
+    xpath_with_ns,
  )
  
  
@@ -202,8 +205,8 @@ class InfoExtractor(object):
      There must be a key "entries", which is a list, an iterable, or a PagedList
      object, each element of which is a valid dictionary by this specification.
  
-    Additionally, playlists can have "title" and "id" attributes with the same
-    semantics as videos (see above).
+    Additionally, playlists can have "title", "description" and "id" attributes
+    with the same semantics as videos (see above).
  
  
      _type "multi_video" indicates that there are multiple videos that
@@ -608,7 +611,7 @@ class InfoExtractor(object):
  
          return (username, password)
  
-    def _get_tfa_info(self):
+    def _get_tfa_info(self, note='two-factor verification code'):
          """
          Get the two-factor authentication info
          TODO - asking the user will be required for sms/phone verify
@@ -622,7 +625,7 @@ class InfoExtractor(object):
          if downloader_params.get('twofactor', None) is not None:
              return downloader_params['twofactor']
  
-        return None
+        return compat_getpass('Type %s and press [Return]: ' % note)
  
      # Helper functions for extracting OpenGraph info
      @staticmethod
@@ -638,7 +641,7 @@ class InfoExtractor(object):
      @staticmethod
      def _meta_regex(prop):
          return r'''(?isx)<meta
-                    (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
+                    (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1)
                      [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
  
      def _og_search_property(self, prop, html, name=None, **kargs):
@@ -722,16 +725,18 @@ class InfoExtractor(object):
  
      @staticmethod
      def _hidden_inputs(html):
-        return dict([
-            (input.group('name'), input.group('value')) for input in re.finditer(
-                r'''(?x)
-                    <input\s+
-                        type=(?P<q_hidden>["\'])hidden(?P=q_hidden)\s+
-                        name=(?P<q_name>["\'])(?P<name>.+?)(?P=q_name)\s+
-                        (?:id=(?P<q_id>["\']).+?(?P=q_id)\s+)?
-                        value=(?P<q_value>["\'])(?P<value>.*?)(?P=q_value)
-                ''', html)
-        ])
+        hidden_inputs = {}
+        for input in re.findall(r'<input([^>]+)>', html):
+            if not re.search(r'type=(["\'])hidden\1', input):
+                continue
+            name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
+            if not name:
+                continue
+            value = re.search(r'value=(["\'])(?P<value>.*?)\1', input)
+            if not value:
+                continue
+            hidden_inputs[name.group('value')] = value.group('value')
+        return hidden_inputs
  
      def _form_hidden_inputs(self, form_id, html):
          form = self._search_regex(
@@ -1142,6 +1147,49 @@ class InfoExtractor(object):
              })
          return subtitles
  
+    def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
+        xspf = self._download_xml(
+            playlist_url, playlist_id, 'Downloading xpsf playlist',
+            'Unable to download xspf manifest', fatal=fatal)
+        if xspf is False:
+            return []
+        return self._parse_xspf(xspf, playlist_id)
+
+    def _parse_xspf(self, playlist, playlist_id):
+        NS_MAP = {
+            'xspf': 'http://xspf.org/ns/0/',
+            's1': 'http://static.streamone.nl/player/ns/0',
+        }
+
+        entries = []
+        for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
+            title = xpath_text(
+                track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
+            description = xpath_text(
+                track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
+            thumbnail = xpath_text(
+                track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
+            duration = float_or_none(
+                xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
+
+            formats = [{
+                'url': location.text,
+                'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
+                'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
+                'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
+            } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
+            self._sort_formats(formats)
+
+            entries.append({
+                'id': playlist_id,
+                'title': title,
+                'description': description,
+                'thumbnail': thumbnail,
+                'duration': duration,
+                'formats': formats,
+            })
+        return entries
+
      def _live_title(self, name):
          """ Generate the title for a live video """
          now = datetime.datetime.now()