Merge remote-tracking branch 'upstream/master' into bliptv

[youtube-dl] / youtube_dl / extractor / zdf.py
diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py

index ed385450cf28ad25185c0d590bca77c2a9092e0b..a795f56b37bbd710c895b0e255342057a5aa354f 100644 (file)
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -9,6 +9,7 @@ from ..utils import (
      int_or_none,
      unified_strdate,
      OnDemandPagedList,
+    xpath_text,
  )
  
  
@@ -19,13 +20,11 @@ def extract_from_xml_url(ie, video_id, xml_url):
          errnote='Failed to download video info')
  
      title = doc.find('.//information/title').text
-    description = doc.find('.//information/detail').text
-    duration = int(doc.find('.//details/lengthSec').text)
-    uploader_node = doc.find('.//details/originChannelTitle')
-    uploader = None if uploader_node is None else uploader_node.text
-    uploader_id_node = doc.find('.//details/originChannelId')
-    uploader_id = None if uploader_id_node is None else uploader_id_node.text
-    upload_date = unified_strdate(doc.find('.//details/airtime').text)
+    description = xpath_text(doc, './/information/detail', 'description')
+    duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration'))
+    uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader')
+    uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id')
+    upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date'))
  
      def xml_to_format(fnode):
          video_url = fnode.find('url').text
@@ -40,15 +39,14 @@ def extract_from_xml_url(ie, video_id, xml_url):
          ext = format_m.group('container')
          proto = format_m.group('proto').lower()
  
-        quality = fnode.find('./quality').text
-        abr = int(fnode.find('./audioBitrate').text) // 1000
-        vbr_node = fnode.find('./videoBitrate')
-        vbr = None if vbr_node is None else int(vbr_node.text) // 1000
+        quality = xpath_text(fnode, './quality', 'quality')
+        abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000)
+        vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000)
  
-        width_node = fnode.find('./width')
-        width = None if width_node is None else int_or_none(width_node.text)
-        height_node = fnode.find('./height')
-        height = None if height_node is None else int_or_none(height_node.text)
+        width = int_or_none(xpath_text(fnode, './width', 'width'))
+        height = int_or_none(xpath_text(fnode, './height', 'height'))
+
+        filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize'))
  
          format_note = ''
          if not format_note:
@@ -64,26 +62,30 @@ def extract_from_xml_url(ie, video_id, xml_url):
              'vbr': vbr,
              'width': width,
              'height': height,
-            'filesize': int_or_none(fnode.find('./filesize').text),
+            'filesize': filesize,
              'format_note': format_note,
              'protocol': proto,
              '_available': is_available,
          }
  
      def xml_to_thumbnails(fnode):
-        thumbnails = list()
+        thumbnails = []
          for node in fnode:
-            thumbnail = {'url': node.text}
+            thumbnail_url = node.text
+            if not thumbnail_url:
+                continue
+            thumbnail = {
+                'url': thumbnail_url,
+            }
              if 'key' in node.attrib:
-                if re.match("^[0-9]+x[0-9]+$", node.attrib['key']):
-                    thumbnail['width'] = int_or_none(node.attrib['key'].split('x')[0])
-                    thumbnail['height'] = int_or_none(node.attrib['key'].split('x')[1])
+                m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key'])
+                if m:
+                    thumbnail['width'] = int(m.group(1))
+                    thumbnail['height'] = int(m.group(2))
              thumbnails.append(thumbnail)
          return thumbnails
  
-
-    thumbnail_nodes = doc.findall('.//teaserimages/teaserimage')
-    thumbnails = xml_to_thumbnails(thumbnail_nodes)
+    thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage'))
  
      format_nodes = doc.findall('.//formitaeten/formitaet')
      formats = list(filter(