X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fappletrailers.py;h=576f03b5b71115771555e1d8d46f4a108eb9de93;hb=cd5b4b0bc2876e16656d33156754ce3c05aa1619;hp=4359b88d1b7057944beb126eb8a1c82dbb818758;hpb=c71dfccc98208be44b1f639af72a257dae34d966;p=youtube-dl diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 4359b88d1..576f03b5b 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -4,16 +4,19 @@ import re import json from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( - compat_urlparse, int_or_none, ) class AppleTrailersIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P[^/]+)/(?P[^/]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P[^/]+)/(?P[^/]+)' + _TESTS = [{ "url": "http://trailers.apple.com/trailers/wb/manofsteel/", + 'info_dict': { + 'id': 'manofsteel', + }, "playlist": [ { "md5": "d97a8e575432dbcb81b7c3acb741f8a8", @@ -60,7 +63,10 @@ class AppleTrailersIE(InfoExtractor): }, }, ] - } + }, { + 'url': 'http://trailers.apple.com/ca/metropole/autrui/', + 'only_matching': True, + }] _JSON_RE = r'iTunes.playURL\((.*?)\);' @@ -70,15 +76,17 @@ class AppleTrailersIE(InfoExtractor): uploader_id = mobj.group('company') playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') + def fix_html(s): s = re.sub(r'(?s).*?', '', s) s = re.sub(r'', r'', s) # The ' in the onClick attributes are not escaped, it couldn't be parsed # like: http://trailers.apple.com/trailers/wb/gravity/ + def _clean_json(m): return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') s = re.sub(self._JSON_RE, _clean_json, s) - s = '' + s + u'' + s = '%s' % s return s doc = self._download_xml(playlist_url, movie, transform_source=fix_html) @@ -86,7 +94,7 @@ class AppleTrailersIE(InfoExtractor): for li in doc.findall('./div/ul/li'): on_click = li.find('.//a').attrib['onClick'] trailer_info_json = self._search_regex(self._JSON_RE, - on_click, 'trailer info') + on_click, 'trailer info') trailer_info = json.loads(trailer_info_json) title = trailer_info['title'] video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() @@ -120,14 +128,15 @@ class AppleTrailersIE(InfoExtractor): playlist.append({ '_type': 'video', 'id': video_id, - 'title': title, 'formats': formats, 'title': title, 'duration': duration, 'thumbnail': thumbnail, 'upload_date': upload_date, 'uploader_id': uploader_id, - 'user_agent': 'QuickTime compatible (youtube-dl)', + 'http_headers': { + 'User-Agent': 'QuickTime compatible (youtube-dl)', + }, }) return {