From: remitamine Date: Thu, 3 Dec 2015 19:05:11 +0000 (+0100) Subject: Merge branch 'master' of https://github.com/rg3/youtube-dl into bilibili X-Git-Url: http://git.bitcoin.ninja/?a=commitdiff_plain;h=640bb54e73779f4a941eae1f17e4be049ca575db;p=youtube-dl Merge branch 'master' of https://github.com/rg3/youtube-dl into bilibili --- 640bb54e73779f4a941eae1f17e4be049ca575db diff --cc youtube_dl/extractor/bilibili.py index 935fcc55c,6c66a1236..1c3644587 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@@ -2,13 -2,16 +2,15 @@@ from __future__ import unicode_literals import re -import itertools import json - import xml.etree.ElementTree as ET from .common import InfoExtractor + from ..compat import ( + compat_etree_fromstring, + ) from ..utils import ( int_or_none, - unified_strdate, + unescapeHTML, ExtractorError, ) @@@ -68,48 -90,55 +70,48 @@@ class BiliBiliIE(InfoExtractor) except ValueError: pass - doc = ET.fromstring(page) - lq_doc = compat_etree_fromstring(lq_page) - lq_durls = lq_doc.findall('./durl') ++ doc = compat_etree_fromstring(page) - hq_doc = self._download_xml( - 'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid, - video_id, - note='Downloading HQ video info', - fatal=False, - ) - if hq_doc is not False: - hq_durls = hq_doc.findall('./durl') - assert len(lq_durls) == len(hq_durls) - else: - hq_durls = itertools.repeat(None) + entries = [] - i = 1 - for lq_durl, hq_durl in zip(lq_durls, hq_durls): + for durl in doc.findall('./durl'): + size = durl.find('./filesize|./size') formats = [{ - 'format_id': 'lq', - 'quality': 1, - 'url': lq_durl.find('./url').text, - 'filesize': int_or_none( - lq_durl.find('./size'), get_attr='text'), + 'url': durl.find('./url').text, + 'filesize': int_or_none(size.text) if size else None, + 'ext': 'flv', }] - if hq_durl is not None: - formats.append({ - 'format_id': 'hq', - 'quality': 2, - 'ext': 'flv', - 'url': hq_durl.find('./url').text, - 'filesize': int_or_none( - hq_durl.find('./size'), get_attr='text'), - }) - self._sort_formats(formats) + backup_urls = durl.find('./backup_url') + if backup_urls is not None: + for backup_url in backup_urls.findall('./url'): + formats.append({'url': backup_url.text}) + formats.reverse() entries.append({ - 'id': '%s_part%d' % (video_id, i), + 'id': '%s_part%s' % (cid, durl.find('./order').text), 'title': title, + 'duration': int_or_none(durl.find('./length').text) // 1000, 'formats': formats, - 'duration': duration, - 'upload_date': upload_date, - 'thumbnail': thumbnail, }) - i += 1 - - return { - '_type': 'multi_video', - 'entries': entries, - 'id': video_id, - 'title': title + info = { + 'id': str(cid), + 'title': title, + 'description': view_data.get('description'), + 'thumbnail': view_data.get('pic'), + 'uploader': view_data.get('author'), + 'timestamp': int_or_none(view_data.get('created')), + 'view_count': view_data.get('play'), + 'duration': int_or_none(doc.find('./timelength').text), } + + if len(entries) == 1: + entries[0].update(info) + return entries[0] + else: + info.update({ + '_type': 'multi_video', + 'id': video_id, + 'entries': entries, + }) + return info