projects
/
youtube-dl
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
bec2c14
)
[WDR] extract jsonp-url by parsing data-extension of mediaLink
author
Boris Wachtmeister
<boris-code@gmx.com>
Thu, 26 May 2016 17:08:12 +0000
(19:08 +0200)
committer
Boris Wachtmeister
<boris-code@gmx.com>
Thu, 26 May 2016 18:54:51 +0000
(20:54 +0200)
youtube_dl/extractor/wdr.py
patch
|
blob
|
history
diff --git
a/youtube_dl/extractor/wdr.py
b/youtube_dl/extractor/wdr.py
index fddcbf190767ab97450708467a2c67dd1ac13f92..dd107ef8af3f6eda9b3fde413db3429d06c5a87a 100644
(file)
--- a/
youtube_dl/extractor/wdr.py
+++ b/
youtube_dl/extractor/wdr.py
@@
-10,6
+10,7
@@
from ..compat import (
)
from ..utils import (
determine_ext,
)
from ..utils import (
determine_ext,
+ js_to_json,
strip_jsonp,
unified_strdate,
ExtractorError,
strip_jsonp,
unified_strdate,
ExtractorError,
@@
-21,8
+22,6
@@
class WDRIE(InfoExtractor):
_PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html'
_VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
_PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html'
_VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
- _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)'
-
_TESTS = [
{
'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html',
_TESTS = [
{
'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html',
@@
-102,9
+101,13
@@
class WDRIE(InfoExtractor):
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
- js_url = self._search_regex(self._JS_URL_REGEX, webpage, 'js_url', default=None)
+ # for wdr.de the data-extension is in a tag with the class "mediaLink"
+ # for wdrmaus its in a link to the page in a multiline "videoLink"-tag
+ json_metadata = self._html_search_regex(
+ r'class=(?:"mediaLink\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"',
+ webpage, 'media link', default=None, flags=re.MULTILINE)
- if not js
_url
:
+ if not js
on_metadata
:
entries = [
self.url_result(page_url + href[0], 'WDR')
for href in re.findall(
entries = [
self.url_result(page_url + href[0], 'WDR')
for href in re.findall(
@@
-117,8
+120,12
@@
class WDRIE(InfoExtractor):
raise ExtractorError('No downloadable streams found', expected=True)
raise ExtractorError('No downloadable streams found', expected=True)
+ media_link_obj = self._parse_json(json_metadata, display_id,
+ transform_source=js_to_json)
+ jsonp_url = media_link_obj['mediaObj']['url']
+
metadata = self._download_json(
metadata = self._download_json(
- js_url, 'metadata', transform_source=strip_jsonp)
+ js
onp
_url, 'metadata', transform_source=strip_jsonp)
metadata_tracker_data = metadata['trackerData']
metadata_media_resource = metadata['mediaResource']
metadata_tracker_data = metadata['trackerData']
metadata_media_resource = metadata['mediaResource']