projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
[corus] Add new extractor(closes #12060)(#9164)
[youtube-dl]
/
youtube_dl
/
extractor
/
theplatform.py
diff --git
a/youtube_dl/extractor/theplatform.py
b/youtube_dl/extractor/theplatform.py
index 108ddd3a9e76b041e6d770d77f79c2ce3568d637..5c5987c6a95ab04bd9f43540dc63abc7b13e2932 100644
(file)
--- a/
youtube_dl/extractor/theplatform.py
+++ b/
youtube_dl/extractor/theplatform.py
@@
-1,4
+1,4
@@
-#
-*- coding: utf-8 -*-
+#
coding: utf-8
from __future__ import unicode_literals
import re
from __future__ import unicode_literals
import re
@@
-9,7
+9,7
@@
import hashlib
from .once import OnceIE
from .once import OnceIE
-from .adobepass import AdobePass
+from .adobepass import AdobePass
IE
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
@@
-33,7
+33,9
@@
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
class ThePlatformBaseIE(OnceIE):
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
class ThePlatformBaseIE(OnceIE):
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
- meta = self._download_xml(smil_url, video_id, note=note, query={'format': 'SMIL'})
+ meta = self._download_xml(
+ smil_url, video_id, note=note, query={'format': 'SMIL'},
+ headers=self.geo_verification_headers())
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
if error_element is not None and error_element.attrib['src'].startswith(
'http://link.theplatform.com/s/errorFiles/Unavailable.'):
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
if error_element is not None and error_element.attrib['src'].startswith(
'http://link.theplatform.com/s/errorFiles/Unavailable.'):
@@
-73,10
+75,10
@@
class ThePlatformBaseIE(OnceIE):
if isinstance(captions, list):
for caption in captions:
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
if isinstance(captions, list):
for caption in captions:
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
- subtitles
[lang] = [
{
+ subtitles
.setdefault(lang, []).append(
{
'ext': mimetype2ext(mime),
'url': src,
'ext': mimetype2ext(mime),
'url': src,
- }
]
+ }
)
return {
'title': info['title'],
return {
'title': info['title'],
@@
-93,10
+95,10
@@
class ThePlatformBaseIE(OnceIE):
return self._parse_theplatform_metadata(info)
return self._parse_theplatform_metadata(info)
-class ThePlatformIE(ThePlatformBaseIE, AdobePass):
+class ThePlatformIE(ThePlatformBaseIE, AdobePass
IE
):
_VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
_VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
- (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
+ (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)
?
|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|theplatform:)(?P<id>[^/\?&]+)'''
_TESTS = [{
|theplatform:)(?P<id>[^/\?&]+)'''
_TESTS = [{
@@
-116,6
+118,7
@@
class ThePlatformIE(ThePlatformBaseIE, AdobePass):
# rtmp download
'skip_download': True,
},
# rtmp download
'skip_download': True,
},
+ 'skip': '404 Not Found',
}, {
# from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
}, {
# from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
@@
-153,7
+156,7
@@
class ThePlatformIE(ThePlatformBaseIE, AdobePass):
'title': 'iPhone Siri’s sassy response to a math question has people talking',
'description': 'md5:a565d1deadd5086f3331d57298ec6333',
'duration': 83.0,
'title': 'iPhone Siri’s sassy response to a math question has people talking',
'description': 'md5:a565d1deadd5086f3331d57298ec6333',
'duration': 83.0,
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail':
r
're:^https?://.*\.jpg$',
'timestamp': 1435752600,
'upload_date': '20150701',
'uploader': 'NBCU-NEWS',
'timestamp': 1435752600,
'upload_date': '20150701',
'uploader': 'NBCU-NEWS',
@@
-164,7
+167,6
@@
class ThePlatformIE(ThePlatformBaseIE, AdobePass):
'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
'only_matching': True,
}]
'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
'only_matching': True,
}]
- _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
@classmethod
def _extract_urls(cls, webpage):
@classmethod
def _extract_urls(cls, webpage):
@@
-295,7
+297,7
@@
class ThePlatformFeedIE(ThePlatformBaseIE):
'ext': 'mp4',
'title': 'The Biden factor: will Joe run in 2016?',
'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.',
'ext': 'mp4',
'title': 'The Biden factor: will Joe run in 2016?',
'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail':
r
're:^https?://.*\.jpg$',
'upload_date': '20140208',
'timestamp': 1391824260,
'duration': 467.0,
'upload_date': '20140208',
'timestamp': 1391824260,
'duration': 467.0,
@@
-304,9
+306,10
@@
class ThePlatformFeedIE(ThePlatformBaseIE):
},
}]
},
}]
- def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}):
+ def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}
, account_id=None
):
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
entry = self._download_json(real_url, video_id)['entries'][0]
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
entry = self._download_json(real_url, video_id)['entries'][0]
+ main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
formats = []
subtitles = {}
formats = []
subtitles = {}
@@
-331,7
+334,7
@@
class ThePlatformFeedIE(ThePlatformBaseIE):
if asset_type in asset_types_query:
query.update(asset_types_query[asset_type])
cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
if asset_type in asset_types_query:
query.update(asset_types_query[asset_type])
cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
- smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
+
main_smil_url or
smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
formats.extend(cur_formats)
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
formats.extend(cur_formats)
subtitles = self._merge_subtitles(subtitles, cur_subtitles)