[airmozilla] Add new extractor
[youtube-dl] / youtube_dl / extractor / appletrailers.py
index 4359b88d1b7057944beb126eb8a1c82dbb818758..43e82847ff8eb472bdac0a91c2138220f001b3b6 100644 (file)
@@ -4,8 +4,8 @@ import re
 import json
 
 from .common import InfoExtractor
+from ..compat import compat_urlparse
 from ..utils import (
-    compat_urlparse,
     int_or_none,
 )
 
@@ -14,6 +14,9 @@ class AppleTrailersIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
     _TEST = {
         "url": "http://trailers.apple.com/trailers/wb/manofsteel/",
+        'info_dict': {
+            'id': 'manofsteel',
+        },
         "playlist": [
             {
                 "md5": "d97a8e575432dbcb81b7c3acb741f8a8",
@@ -70,15 +73,17 @@ class AppleTrailersIE(InfoExtractor):
         uploader_id = mobj.group('company')
 
         playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
+
         def fix_html(s):
             s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
             s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
             # The ' in the onClick attributes are not escaped, it couldn't be parsed
             # like: http://trailers.apple.com/trailers/wb/gravity/
+
             def _clean_json(m):
                 return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
             s = re.sub(self._JSON_RE, _clean_json, s)
-            s = '<html>' + s + u'</html>'
+            s = '<html>%s</html>' % s
             return s
         doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
 
@@ -86,7 +91,7 @@ class AppleTrailersIE(InfoExtractor):
         for li in doc.findall('./div/ul/li'):
             on_click = li.find('.//a').attrib['onClick']
             trailer_info_json = self._search_regex(self._JSON_RE,
-                on_click, 'trailer info')
+                                                   on_click, 'trailer info')
             trailer_info = json.loads(trailer_info_json)
             title = trailer_info['title']
             video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
@@ -120,14 +125,15 @@ class AppleTrailersIE(InfoExtractor):
             playlist.append({
                 '_type': 'video',
                 'id': video_id,
-                'title': title,
                 'formats': formats,
                 'title': title,
                 'duration': duration,
                 'thumbnail': thumbnail,
                 'upload_date': upload_date,
                 'uploader_id': uploader_id,
-                'user_agent': 'QuickTime compatible (youtube-dl)',
+                'http_headers': {
+                    'User-Agent': 'QuickTime compatible (youtube-dl)',
+                },
             })
 
         return {