[addanime] improve
authorPhilipp Hagemeister <phihag@phihag.de>
Wed, 28 Aug 2013 02:25:38 +0000 (04:25 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Wed, 28 Aug 2013 02:25:38 +0000 (04:25 +0200)
1  2 
youtube_dl/extractor/__init__.py
youtube_dl/extractor/appletrailers.py
youtube_dl/extractor/common.py
youtube_dl/utils.py

index 2f86f2acaca564f1db652a837dee702507bed8f7,28dcb2cc4ae830d3b1a4debafae02e3a1d66dafa..c76b99a81687e0904bbc147a4e592fd130cfa227
@@@ -1,4 -1,5 +1,5 @@@
 -
 -from .AddAnime import AddAnimeIE
 +from .appletrailers import AppleTrailersIE
++from .addanime import AddAnimeIE
  from .archiveorg import ArchiveOrgIE
  from .ard import ARDIE
  from .arte import ArteTvIE
index 7d126e2d24203b60343052acdb362d941fd7d56e,0000000000000000000000000000000000000000..b3bdb2955e974591c1a7afc5b20776261adeb193
mode 100644,000000..100644
--- /dev/null
@@@ -1,167 -1,0 +1,167 @@@
-             for formats_el in li.findall('.//li/a'):
 +import re
 +import xml.etree.ElementTree
 +
 +from .common import InfoExtractor
 +from ..utils import (
 +    determine_ext,
 +    ExtractorError,
 +)
 +
 +
 +class AppleTrailersIE(InfoExtractor):
 +    _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
 +    _TEST = {
 +        u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
 +        u"playlist": [
 +            {
 +                u"file": u"manofsteel-trailer4.mov",
 +                u"md5": u"11874af099d480cc09e103b189805d5f",
 +                u"info_dict": {
 +                    u"duration": 111,
 +                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
 +                    u"title": u"Trailer 4",
 +                    u"upload_date": u"20130523",
 +                    u"uploader_id": u"wb",
 +                },
 +            },
 +            {
 +                u"file": u"manofsteel-trailer3.mov",
 +                u"md5": u"07a0a262aae5afe68120eed61137ab34",
 +                u"info_dict": {
 +                    u"duration": 182,
 +                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
 +                    u"title": u"Trailer 3",
 +                    u"upload_date": u"20130417",
 +                    u"uploader_id": u"wb",
 +                },
 +            },
 +            {
 +                u"file": u"manofsteel-trailer.mov",
 +                u"md5": u"e401fde0813008e3307e54b6f384cff1",
 +                u"info_dict": {
 +                    u"duration": 148,
 +                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
 +                    u"title": u"Trailer",
 +                    u"upload_date": u"20121212",
 +                    u"uploader_id": u"wb",
 +                },
 +            },
 +            {
 +                u"file": u"manofsteel-teaser.mov",
 +                u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
 +                u"info_dict": {
 +                    u"duration": 93,
 +                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
 +                    u"title": u"Teaser",
 +                    u"upload_date": u"20120721",
 +                    u"uploader_id": u"wb",
 +                },
 +            }
 +        ]
 +    }
 +
 +    def _real_extract(self, url):
 +        mobj = re.match(self._VALID_URL, url)
 +        movie = mobj.group('movie')
 +        uploader_id = mobj.group('company')
 +
 +        playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
 +        playlist_snippet = self._download_webpage(playlist_url, movie)
 +        playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet)
 +        playlist_html = u'<html>' + playlist_cleaned + u'</html>'
 +
 +        size_cache = {}
 +
 +        doc = xml.etree.ElementTree.fromstring(playlist_html)
 +        playlist = []
 +        for li in doc.findall('./div/ul/li'):
 +            title = li.find('.//h3').text
 +            video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
 +            thumbnail = li.find('.//img').attrib['src']
 +
 +            date_el = li.find('.//p')
 +            upload_date = None
 +            m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text)
 +            if m:
 +                upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
 +            runtime_el = date_el.find('./br')
 +            m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail)
 +            duration = None
 +            if m:
 +                duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
 +
 +            formats = []
++            for formats_el in li.findall('.//a'):
 +                if formats_el.attrib['class'] != 'OverlayPanel':
 +                    continue
 +                target = formats_el.attrib['target']
 +
 +                format_code = formats_el.text
 +                if 'Automatic' in format_code:
 +                    continue
 +
 +                size_q = formats_el.attrib['href']
 +                size_id = size_q.rpartition('#videos-')[2]
 +                if size_id not in size_cache:
 +                    size_url = url + size_q
 +                    sizepage_html = self._download_webpage(
 +                        size_url, movie,
 +                        note=u'Downloading size info %s' % size_id,
 +                        errnote=u'Error while downloading size info %s' % size_id,
 +                    )
 +                    _doc = xml.etree.ElementTree.fromstring(sizepage_html)
 +                    size_cache[size_id] = _doc
 +
 +                sizepage_doc = size_cache[size_id]
 +                links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
 +                for vid_a in links:
 +                    href = vid_a.get('href')
 +                    if not href.endswith(target):
 +                        continue
 +                    detail_q = href.partition('#')[0]
 +                    detail_url = url + '/' + detail_q
 +
 +                    m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q)
 +                    detail_id = m.group('detail_id')
 +
 +                    detail_html = self._download_webpage(
 +                        detail_url, movie,
 +                        note=u'Downloading detail %s %s' % (detail_id, size_id),
 +                        errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
 +                    )
 +                    detail_doc = xml.etree.ElementTree.fromstring(detail_html)
 +                    movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
 +                    assert movie_link_el.get('class') == 'movieLink'
 +                    movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
 +                    ext = determine_ext(movie_link)
 +                    assert ext == 'mov'
 +
 +                    formats.append({
 +                        'format': format_code,
 +                        'ext': ext,
 +                        'url': movie_link,
 +                    })
 +
 +            info = {
 +                '_type': 'video',
 +                'id': video_id,
 +                'title': title,
 +                'formats': formats,
 +                'title': title,
 +                'duration': duration,
 +                'thumbnail': thumbnail,
 +                'upload_date': upload_date,
 +                'uploader_id': uploader_id,
 +                'user_agent': 'QuickTime compatible (youtube-dl)',
 +            }
 +            # TODO: Remove when #980 has been merged
 +            info['url'] = formats[-1]['url']
 +            info['ext'] = formats[-1]['ext']
 +
 +            playlist.append(info)
 +
 +        return {
 +            '_type': 'playlist',
 +            'id': movie,
 +            'entries': playlist,
 +        }
index 52c4483c9ecafe914270444b848ba8ac5966f802,da50abfc1cd492b8d360ef601b44841a938c055b..12169b2bb9209dd901d280e02ce10349e1cad6b4
@@@ -129,7 -122,7 +129,7 @@@ class InfoExtractor(object)
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              if errnote is None:
                  errnote = u'Unable to download webpage'
--            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
++            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
  
      def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
          """ Returns a tuple (page content as string, URL handle) """
index 52cfb8a6d18e69be0db213bbf3f4da63f1fcd7d9,59eeaf4a89084783e1ca2607840b3b7dfc4670f5..f78b5fe7889e6493ed8d87046dd7294fb3e3809f
@@@ -60,6 -60,6 +60,11 @@@ try
  except ImportError: # Python 2
      import httplib as compat_http_client
  
++try:
++    from http.error import HTTPError as compat_HTTPError
++except ImportError:  # Python 2
++    from urllib2 import HTTPError as compat_HTTPError
++
  try:
      from subprocess import DEVNULL
      compat_subprocess_get_DEVNULL = lambda: DEVNULL
@@@ -489,7 -489,7 +494,7 @@@ def make_HTTPS_handler(opts)
  
  class ExtractorError(Exception):
      """Error during info extraction."""
--    def __init__(self, msg, tb=None, expected=False):
++    def __init__(self, msg, tb=None, expected=False, cause=None):
          """ tb, if given, is the original traceback (so that it can be printed out).
          If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
          """
  
          self.traceback = tb
          self.exc_info = sys.exc_info()  # preserve original exception
++        self.cause = cause
  
      def format_traceback(self):
          if self.traceback is None: