]> git.bitcoin.ninja Git - youtube-dl/commitdiff
[bilibili] Add preliminary support (#2174)
authorPhilipp Hagemeister <phihag@phihag.de>
Mon, 21 Apr 2014 11:45:27 +0000 (13:45 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Mon, 21 Apr 2014 11:46:41 +0000 (13:46 +0200)
The URL http://www.bilibili.tv/video/av636603/index_2.html does not work yet.

youtube_dl/extractor/__init__.py
youtube_dl/extractor/bilibili.py [new file with mode: 0644]
youtube_dl/extractor/common.py
youtube_dl/utils.py

index 8235d42b6cf45ea79accdf121e7a34e4b7d24d02..2c2e2230f110ee3b3cba6cc775d1e0889c388008 100644 (file)
@@ -20,6 +20,7 @@ from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbccouk import BBCCoUkIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbccouk import BBCCoUkIE
+from .bilibili import BiliBiliIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py
new file mode 100644 (file)
index 0000000..45067b9
--- /dev/null
@@ -0,0 +1,106 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_parse_qs,
+    ExtractorError,
+    int_or_none,
+    unified_strdate,
+)
+
+
+class BiliBiliIE(InfoExtractor):
+    _VALID_URL = r'http://www\.bilibili\.tv/video/av(?P<id>[0-9]+)/'
+
+    _TEST = {
+        'url': 'http://www.bilibili.tv/video/av1074402/',
+        'md5': '2c301e4dab317596e837c3e7633e7d86',
+        'info_dict': {
+            'id': '1074402',
+            'ext': 'flv',
+            'title': '【金坷垃】金泡沫',
+            'duration': 308,
+            'upload_date': '20140420',
+            'thumbnail': 're:^https?://.+\.jpg',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        video_code = self._search_regex(
+            r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
+
+        title = self._html_search_meta(
+            'media:title', video_code, 'title', fatal=True)
+        duration_str = self._html_search_meta(
+            'duration', video_code, 'duration')
+        if duration_str is None:
+            duration = None
+        else:
+            duration_mobj = re.match(
+                r'^T(?:(?P<hours>[0-9]+)H)?(?P<minutes>[0-9]+)M(?P<seconds>[0-9]+)S$',
+                duration_str)
+            duration = (
+                int_or_none(duration_mobj.group('hours'), default=0) * 3600 +
+                int(duration_mobj.group('minutes')) * 60 +
+                int(duration_mobj.group('seconds')))
+        upload_date = unified_strdate(self._html_search_meta(
+            'uploadDate', video_code, fatal=False))
+        thumbnail = self._html_search_meta(
+            'thumbnailUrl', video_code, 'thumbnail', fatal=False)
+
+        player_params = compat_parse_qs(self._html_search_regex(
+            r'<iframe .*?class="player" src="https://secure.bilibili.tv/secure,([^"]+)"',
+            webpage, 'player params'))
+
+        if 'cid' in player_params:
+            cid = player_params['cid'][0]
+
+            lq_doc = self._download_xml(
+                'http://interface.bilibili.cn/v_cdn_play?cid=%s' % cid,
+                video_id,
+                note='Downloading LQ video info'
+            )
+            lq_durl = lq_doc.find('.//durl')
+            formats = [{
+                'format_id': 'lq',
+                'quality': 1,
+                'url': lq_durl.find('./url').text,
+                'filesize': int_or_none(
+                    lq_durl.find('./size'), get_attr='text'),
+            }]
+
+            hq_doc = self._download_xml(
+                'http://interface.bilibili.cn/playurl?cid=%s' % cid,
+                video_id,
+                note='Downloading HQ video info',
+                fatal=False,
+            )
+            if hq_doc is not False:
+                hq_durl = hq_doc.find('.//durl')
+                formats.append({
+                    'format_id': 'hq',
+                    'quality': 2,
+                    'ext': 'flv',
+                    'url': hq_durl.find('./url').text,
+                    'filesize': int_or_none(
+                        hq_durl.find('./size'), get_attr='text'),
+                })
+        else:
+            raise ExtractorError('Unsupported player parameters: %r' % (player_params,))
+
+        self._sort_formats(formats)
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'duration': duration,
+            'upload_date': upload_date,
+            'thumbnail': thumbnail,
+        }
index 9653d44eb33db4efb74e97529cdd2aaea0dc1c03..ef02b68966e88d8d1cbc2f9d628e5a79fe8ce3c3 100644 (file)
@@ -279,9 +279,12 @@ class InfoExtractor(object):
 
     def _download_xml(self, url_or_request, video_id,
                       note=u'Downloading XML', errnote=u'Unable to download XML',
 
     def _download_xml(self, url_or_request, video_id,
                       note=u'Downloading XML', errnote=u'Unable to download XML',
-                      transform_source=None):
+                      transform_source=None, fatal=True):
         """Return the xml as an xml.etree.ElementTree.Element"""
         """Return the xml as an xml.etree.ElementTree.Element"""
-        xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
+        xml_string = self._download_webpage(
+            url_or_request, video_id, note, errnote, fatal=fatal)
+        if xml_string is False:
+            return xml_string
         if transform_source:
             xml_string = transform_source(xml_string)
         return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
         if transform_source:
             xml_string = transform_source(xml_string)
         return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
index d4df78071663489752a80a1a833662fe5505ec3b..9190a8fb85f7e9cc53c06c9699468502de486033 100644 (file)
@@ -1245,7 +1245,10 @@ class HEADRequest(compat_urllib_request.Request):
         return "HEAD"
 
 
         return "HEAD"
 
 
-def int_or_none(v, scale=1, default=None):
+def int_or_none(v, scale=1, default=None, get_attr=None):
+    if get_attr:
+        if v is not None:
+            v = getattr(v, get_attr, None)
     return default if v is None else (int(v) // scale)
 
 
     return default if v is None else (int(v) // scale)