Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (#7178)

[youtube-dl] / youtube_dl / extractor / common.py
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 0082a4c84606f4f368d6d2075f34a021fb8da72e..52523d7b249210eed88d334c2fefe28cabe78706 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -10,13 +10,11 @@ import re
  import socket
  import sys
  import time
-import xml.etree.ElementTree
  
  from ..compat import (
      compat_cookiejar,
      compat_cookies,
      compat_getpass,
-    compat_HTTPError,
      compat_http_client,
      compat_urllib_error,
      compat_urllib_parse,
@@ -24,6 +22,7 @@ from ..compat import (
      compat_urllib_request,
      compat_urlparse,
      compat_str,
+    compat_etree_fromstring,
  )
  from ..utils import (
      NO_DEFAULT,
@@ -172,6 +171,7 @@ class InfoExtractor(object):
      view_count:     How many users have watched the video on the platform.
      like_count:     Number of positive ratings of the video
      dislike_count:  Number of negative ratings of the video
+    repost_count:   Number of reposts of the video
      average_rating: Average rating give by users, the scale used depends on the webpage
      comment_count:  Number of comments on the video
      comments:       A list of comments, each with one or more of the following
@@ -461,7 +461,7 @@ class InfoExtractor(object):
              return xml_string
          if transform_source:
              xml_string = transform_source(xml_string)
-        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
+        return compat_etree_fromstring(xml_string.encode('utf-8'))
  
      def _download_json(self, url_or_request, video_id,
                         note='Downloading JSON metadata',
@@ -645,8 +645,9 @@ class InfoExtractor(object):
      # Helper functions for extracting OpenGraph info
      @staticmethod
      def _og_regexes(prop):
-        content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\'|\s*([^\s"\'=<>`]+?))'
-        property_re = r'(?:name|property)=[\'"]?og:%s[\'"]?' % re.escape(prop)
+        content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
+        property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
+                       % {'prop': re.escape(prop)})
          template = r'<meta[^>]+?%s[^>]+?%s'
          return [
              template % (property_re, content_re),
@@ -840,7 +841,7 @@ class InfoExtractor(object):
              self._request_webpage(url, video_id, 'Checking %s URL' % item)
              return True
          except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError):
+            if isinstance(e.cause, compat_urllib_error.URLError):
                  self.to_screen(
                      '%s: %s URL is invalid, skipping' % (video_id, item))
                  return False
@@ -942,13 +943,14 @@ class InfoExtractor(object):
              if re.match(r'^https?://', u)
              else compat_urlparse.urljoin(m3u8_url, u))
  
-        m3u8_doc = self._download_webpage(
+        m3u8_doc, urlh = self._download_webpage_handle(
              m3u8_url, video_id,
              note=note or 'Downloading m3u8 information',
              errnote=errnote or 'Failed to download m3u8 information',
              fatal=fatal)
          if m3u8_doc is False:
              return m3u8_doc
+        m3u8_url = urlh.geturl()
          last_info = None
          last_media = None
          kv_rex = re.compile(