[addanime] improve

author Philipp Hagemeister <phihag@phihag.de>

Wed, 28 Aug 2013 02:25:38 +0000 (04:25 +0200)

committer Philipp Hagemeister <phihag@phihag.de>

Wed, 28 Aug 2013 02:25:38 +0000 (04:25 +0200)
author Philipp Hagemeister <phihag@phihag.de>
Wed, 28 Aug 2013 02:25:38 +0000 (04:25 +0200)
committer Philipp Hagemeister <phihag@phihag.de>
Wed, 28 Aug 2013 02:25:38 +0000 (04:25 +0200)
diff --cc youtube_dl/extractor/__init__.py

index 2f86f2acaca564f1db652a837dee702507bed8f7,28dcb2cc4ae830d3b1a4debafae02e3a1d66dafa..c76b99a81687e0904bbc147a4e592fd130cfa227
--- 1/youtube_dl/extractor/__init__.py
--- 2/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@@ -1,4 -1,5 +1,5 @@@
- -
- -from .AddAnime import AddAnimeIE
+ +from .appletrailers import AppleTrailersIE
++from .addanime import AddAnimeIE
   from .archiveorg import ArchiveOrgIE
   from .ard import ARDIE
   from .arte import ArteTvIE
diff --cc youtube_dl/extractor/appletrailers.py

index 7d126e2d24203b60343052acdb362d941fd7d56e,0000000000000000000000000000000000000000..b3bdb2955e974591c1a7afc5b20776261adeb193

mode 100644,000000..100644
--- 1/youtube_dl/extractor/appletrailers.py
--- /dev/null
+++ b/youtube_dl/extractor/appletrailers.py
@@@ -1,167 -1,0 +1,167 @@@
-             for formats_el in li.findall('.//li/a'):
+ +import re
+ +import xml.etree.ElementTree
+ +
+ +from .common import InfoExtractor
+ +from ..utils import (
+ +    determine_ext,
+ +    ExtractorError,
+ +)
+ +
+ +
+ +class AppleTrailersIE(InfoExtractor):
+ +    _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+ +    _TEST = {
+ +        u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
+ +        u"playlist": [
+ +            {
+ +                u"file": u"manofsteel-trailer4.mov",
+ +                u"md5": u"11874af099d480cc09e103b189805d5f",
+ +                u"info_dict": {
+ +                    u"duration": 111,
+ +                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
+ +                    u"title": u"Trailer 4",
+ +                    u"upload_date": u"20130523",
+ +                    u"uploader_id": u"wb",
+ +                },
+ +            },
+ +            {
+ +                u"file": u"manofsteel-trailer3.mov",
+ +                u"md5": u"07a0a262aae5afe68120eed61137ab34",
+ +                u"info_dict": {
+ +                    u"duration": 182,
+ +                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
+ +                    u"title": u"Trailer 3",
+ +                    u"upload_date": u"20130417",
+ +                    u"uploader_id": u"wb",
+ +                },
+ +            },
+ +            {
+ +                u"file": u"manofsteel-trailer.mov",
+ +                u"md5": u"e401fde0813008e3307e54b6f384cff1",
+ +                u"info_dict": {
+ +                    u"duration": 148,
+ +                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
+ +                    u"title": u"Trailer",
+ +                    u"upload_date": u"20121212",
+ +                    u"uploader_id": u"wb",
+ +                },
+ +            },
+ +            {
+ +                u"file": u"manofsteel-teaser.mov",
+ +                u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
+ +                u"info_dict": {
+ +                    u"duration": 93,
+ +                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
+ +                    u"title": u"Teaser",
+ +                    u"upload_date": u"20120721",
+ +                    u"uploader_id": u"wb",
+ +                },
+ +            }
+ +        ]
+ +    }
+ +
+ +    def _real_extract(self, url):
+ +        mobj = re.match(self._VALID_URL, url)
+ +        movie = mobj.group('movie')
+ +        uploader_id = mobj.group('company')
+ +
+ +        playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
+ +        playlist_snippet = self._download_webpage(playlist_url, movie)
+ +        playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet)
+ +        playlist_html = u'<html>' + playlist_cleaned + u'</html>'
+ +
+ +        size_cache = {}
+ +
+ +        doc = xml.etree.ElementTree.fromstring(playlist_html)
+ +        playlist = []
+ +        for li in doc.findall('./div/ul/li'):
+ +            title = li.find('.//h3').text
+ +            video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
+ +            thumbnail = li.find('.//img').attrib['src']
+ +
+ +            date_el = li.find('.//p')
+ +            upload_date = None
+ +            m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text)
+ +            if m:
+ +                upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
+ +            runtime_el = date_el.find('./br')
+ +            m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail)
+ +            duration = None
+ +            if m:
+ +                duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
+ +
+ +            formats = []
++            for formats_el in li.findall('.//a'):
+ +                if formats_el.attrib['class'] != 'OverlayPanel':
+ +                    continue
+ +                target = formats_el.attrib['target']
+ +
+ +                format_code = formats_el.text
+ +                if 'Automatic' in format_code:
+ +                    continue
+ +
+ +                size_q = formats_el.attrib['href']
+ +                size_id = size_q.rpartition('#videos-')[2]
+ +                if size_id not in size_cache:
+ +                    size_url = url + size_q
+ +                    sizepage_html = self._download_webpage(
+ +                        size_url, movie,
+ +                        note=u'Downloading size info %s' % size_id,
+ +                        errnote=u'Error while downloading size info %s' % size_id,
+ +                    )
+ +                    _doc = xml.etree.ElementTree.fromstring(sizepage_html)
+ +                    size_cache[size_id] = _doc
+ +
+ +                sizepage_doc = size_cache[size_id]
+ +                links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
+ +                for vid_a in links:
+ +                    href = vid_a.get('href')
+ +                    if not href.endswith(target):
+ +                        continue
+ +                    detail_q = href.partition('#')[0]
+ +                    detail_url = url + '/' + detail_q
+ +
+ +                    m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q)
+ +                    detail_id = m.group('detail_id')
+ +
+ +                    detail_html = self._download_webpage(
+ +                        detail_url, movie,
+ +                        note=u'Downloading detail %s %s' % (detail_id, size_id),
+ +                        errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
+ +                    )
+ +                    detail_doc = xml.etree.ElementTree.fromstring(detail_html)
+ +                    movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
+ +                    assert movie_link_el.get('class') == 'movieLink'
+ +                    movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
+ +                    ext = determine_ext(movie_link)
+ +                    assert ext == 'mov'
+ +
+ +                    formats.append({
+ +                        'format': format_code,
+ +                        'ext': ext,
+ +                        'url': movie_link,
+ +                    })
+ +
+ +            info = {
+ +                '_type': 'video',
+ +                'id': video_id,
+ +                'title': title,
+ +                'formats': formats,
+ +                'title': title,
+ +                'duration': duration,
+ +                'thumbnail': thumbnail,
+ +                'upload_date': upload_date,
+ +                'uploader_id': uploader_id,
+ +                'user_agent': 'QuickTime compatible (youtube-dl)',
+ +            }
+ +            # TODO: Remove when #980 has been merged
+ +            info['url'] = formats[-1]['url']
+ +            info['ext'] = formats[-1]['ext']
+ +
+ +            playlist.append(info)
+ +
+ +        return {
+ +            '_type': 'playlist',
+ +            'id': movie,
+ +            'entries': playlist,
+ +        }
diff --cc youtube_dl/extractor/common.py

index 52c4483c9ecafe914270444b848ba8ac5966f802,da50abfc1cd492b8d360ef601b44841a938c055b..12169b2bb9209dd901d280e02ce10349e1cad6b4
--- 1/youtube_dl/extractor/common.py
--- 2/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@@ -129,7 -122,7 +129,7 @@@ class InfoExtractor(object)
           except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
               if errnote is None:
                   errnote = u'Unable to download webpage'
--            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
++            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
   
       def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
           """ Returns a tuple (page content as string, URL handle) """
diff --cc youtube_dl/utils.py

index 52cfb8a6d18e69be0db213bbf3f4da63f1fcd7d9,59eeaf4a89084783e1ca2607840b3b7dfc4670f5..f78b5fe7889e6493ed8d87046dd7294fb3e3809f
--- 1/youtube_dl/utils.py
--- 2/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@@ -60,6 -60,6 +60,11 @@@ try
   except ImportError: # Python 2
       import httplib as compat_http_client
   
++try:
++    from http.error import HTTPError as compat_HTTPError
++except ImportError:  # Python 2
++    from urllib2 import HTTPError as compat_HTTPError
++
   try:
       from subprocess import DEVNULL
       compat_subprocess_get_DEVNULL = lambda: DEVNULL
@@@ -489,7 -489,7 +494,7 @@@ def make_HTTPS_handler(opts)
   
   class ExtractorError(Exception):
       """Error during info extraction."""
--    def __init__(self, msg, tb=None, expected=False):
++    def __init__(self, msg, tb=None, expected=False, cause=None):
           """ tb, if given, is the original traceback (so that it can be printed out).
           If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
           """
@@@ -502,6 -502,6 +507,7 @@@
   
           self.traceback = tb
           self.exc_info = sys.exc_info()  # preserve original exception
++        self.cause = cause
   
       def format_traceback(self):
           if self.traceback is None:
author	Philipp Hagemeister <phihag@phihag.de>
	Wed, 28 Aug 2013 02:25:38 +0000 (04:25 +0200)
committer	Philipp Hagemeister <phihag@phihag.de>
	Wed, 28 Aug 2013 02:25:38 +0000 (04:25 +0200)
		1	2
youtube_dl/extractor/__init__.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/extractor/appletrailers.py	patch \|	diff1 \|	\|	blob \| history
youtube_dl/extractor/common.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/utils.py	patch \|	diff1 \|	diff2 \|	blob \| history