Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (#7178)

[youtube-dl] / youtube_dl / extractor / bilibili.py
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py

index a8bea2c10f36c264a168a33aaae59634e6b930eb..6c66a12368ea0a963d89ef5922c9d83f3019ddfc 100644 (file)
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -3,8 +3,12 @@ from __future__ import unicode_literals
  
  import re
  import itertools
+import json
  
  from .common import InfoExtractor
+from ..compat import (
+    compat_etree_fromstring,
+)
  from ..utils import (
      int_or_none,
      unified_strdate,
@@ -39,8 +43,15 @@ class BiliBiliIE(InfoExtractor):
          video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
-        if self._search_regex(r'(此视频不存在或被删除)', webpage, 'error message', default=None):
-            raise ExtractorError('The video does not exist or was deleted', expected=True)
+        if '(此视频不存在或被删除)' in webpage:
+            raise ExtractorError(
+                'The video does not exist or was deleted', expected=True)
+
+        if '>你没有权限浏览！ 由于版权相关问题 我们不对您所在的地区提供服务<' in webpage:
+            raise ExtractorError(
+                'The video is not available in your region due to copyright reasons',
+                expected=True)
+
          video_code = self._search_regex(
              r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
  
@@ -67,11 +78,19 @@ class BiliBiliIE(InfoExtractor):
  
          entries = []
  
-        lq_doc = self._download_xml(
+        lq_page = self._download_webpage(
              'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
              video_id,
              note='Downloading LQ video info'
          )
+        try:
+            err_info = json.loads(lq_page)
+            raise ExtractorError(
+                'BiliBili said: ' + err_info['error_text'], expected=True)
+        except ValueError:
+            pass
+
+        lq_doc = compat_etree_fromstring(lq_page)
          lq_durls = lq_doc.findall('./durl')
  
          hq_doc = self._download_xml(
@@ -95,7 +114,7 @@ class BiliBiliIE(InfoExtractor):
                  'filesize': int_or_none(
                      lq_durl.find('./size'), get_attr='text'),
              }]
-            if hq_durl:
+            if hq_durl is not None:
                  formats.append({
                      'format_id': 'hq',
                      'quality': 2,