[addanime] improve

author Philipp Hagemeister <phihag@phihag.de>

Wed, 28 Aug 2013 02:25:38 +0000 (04:25 +0200)

committer Philipp Hagemeister <phihag@phihag.de>

Wed, 28 Aug 2013 02:25:38 +0000 (04:25 +0200)
author Philipp Hagemeister <phihag@phihag.de>
Wed, 28 Aug 2013 02:25:38 +0000 (04:25 +0200)
committer Philipp Hagemeister <phihag@phihag.de>
Wed, 28 Aug 2013 02:25:38 +0000 (04:25 +0200)
diff --combined youtube_dl/extractor/__init__.py

index 2f86f2acaca564f1db652a837dee702507bed8f7,28dcb2cc4ae830d3b1a4debafae02e3a1d66dafa..c76b99a81687e0904bbc147a4e592fd130cfa227
--- 1/youtube_dl/extractor/__init__.py
--- 2/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@@ -1,4 -1,5 +1,5 @@@
- -
- -from .AddAnime import AddAnimeIE
+ +from .appletrailers import AppleTrailersIE
++from .addanime import AddAnimeIE
   from .archiveorg import ArchiveOrgIE
   from .ard import ARDIE
   from .arte import ArteTvIE
@@@ -7,10 -8,7 +8,10 @@@ from .bandcamp import BandcampI
   from .bliptv import BlipTVIE, BlipTVUserIE
   from .breakcom import BreakIE
   from .brightcove import BrightcoveIE
+ +from .c56 import C56IE
   from .canalplus import CanalplusIE
+ +from .canalc2 import Canalc2IE
+ +from .cnn import CNNIE
   from .collegehumor import CollegeHumorIE
   from .comedycentral import ComedyCentralIE
   from .condenast import CondeNastIE
@@@ -33,7 -31,6 +34,7 @@@ from .gametrailers import GametrailersI
   from .generic import GenericIE
   from .googleplus import GooglePlusIE
   from .googlesearch import GoogleSearchIE
+ +from .hark import HarkIE
   from .hotnewhiphop import HotNewHipHopIE
   from .howcast import HowcastIE
   from .hypem import HypemIE
@@@ -41,7 -38,6 +42,7 @@@ from .ign import IGNIE, OneUPI
   from .ina import InaIE
   from .infoq import InfoQIE
   from .instagram import InstagramIE
+ +from .jeuxvideo import JeuxVideoIE
   from .jukebox import JukeboxIE
   from .justintv import JustinTVIE
   from .kankan import KankanIE
@@@ -55,19 -51,14 +56,19 @@@ from .muzu import MuzuTVI
   from .myspass import MySpassIE
   from .myvideo import MyVideoIE
   from .nba import NBAIE
+ +from .nbc import NBCNewsIE
   from .ooyala import OoyalaIE
+ +from .pbs import PBSIE
   from .photobucket import PhotobucketIE
   from .pornotube import PornotubeIE
   from .rbmaradio import RBMARadioIE
   from .redtube import RedTubeIE
   from .ringtv import RingTVIE
+ +from .ro220 import Ro220IE
   from .roxwel import RoxwelIE
+ +from .rtlnow import RTLnowIE
   from .sina import SinaIE
+ +from .slashdot import SlashdotIE
   from .soundcloud import SoundcloudIE, SoundcloudSetIE
   from .spiegel import SpiegelIE
   from .stanfordoc import StanfordOpenClassroomIE
@@@ -78,11 -69,9 +79,11 @@@ from .ted import TEDI
   from .tf1 import TF1IE
   from .thisav import ThisAVIE
   from .traileraddict import TrailerAddictIE
+ +from .trilulilu import TriluliluIE
   from .tudou import TudouIE
   from .tumblr import TumblrIE
   from .tutv import TutvIE
+ +from .unistra import UnistraIE
   from .ustream import UstreamIE
   from .vbox7 import Vbox7IE
   from .veoh import VeohIE
@@@ -90,6 -79,7 +91,6 @@@ from .vevo import VevoI
   from .videofyme import VideofyMeIE
   from .vimeo import VimeoIE, VimeoChannelIE
   from .vine import VineIE
- -from .c56 import C56IE
   from .wat import WatIE
   from .weibo import WeiboIE
   from .wimp import WimpIE
@@@ -123,14 -113,12 +124,14 @@@ _ALL_CLASSES = 
   ]
   _ALL_CLASSES.append(GenericIE)
   
+ +
   def gen_extractors():
       """ Return a list of an instance of every supported extractor.
       The order does matter; the first extractor matched is the one handling the URL.
       """
       return [klass() for klass in _ALL_CLASSES]
   
+ +
   def get_info_extractor(ie_name):
       """Returns the info extractor class with the given ie_name"""
       return globals()[ie_name+'IE']
diff --combined youtube_dl/extractor/appletrailers.py

index 7d126e2d24203b60343052acdb362d941fd7d56e,0000000000000000000000000000000000000000..b3bdb2955e974591c1a7afc5b20776261adeb193

mode 100644,000000..100644
--- 1/youtube_dl/extractor/appletrailers.py
--- /dev/null
+++ b/youtube_dl/extractor/appletrailers.py
@@@ -1,167 -1,0 +1,167 @@@
-             for formats_el in li.findall('.//li/a'):
+ +import re
+ +import xml.etree.ElementTree
+ +
+ +from .common import InfoExtractor
+ +from ..utils import (
+ +    determine_ext,
+ +    ExtractorError,
+ +)
+ +
+ +
+ +class AppleTrailersIE(InfoExtractor):
+ +    _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+ +    _TEST = {
+ +        u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
+ +        u"playlist": [
+ +            {
+ +                u"file": u"manofsteel-trailer4.mov",
+ +                u"md5": u"11874af099d480cc09e103b189805d5f",
+ +                u"info_dict": {
+ +                    u"duration": 111,
+ +                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
+ +                    u"title": u"Trailer 4",
+ +                    u"upload_date": u"20130523",
+ +                    u"uploader_id": u"wb",
+ +                },
+ +            },
+ +            {
+ +                u"file": u"manofsteel-trailer3.mov",
+ +                u"md5": u"07a0a262aae5afe68120eed61137ab34",
+ +                u"info_dict": {
+ +                    u"duration": 182,
+ +                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
+ +                    u"title": u"Trailer 3",
+ +                    u"upload_date": u"20130417",
+ +                    u"uploader_id": u"wb",
+ +                },
+ +            },
+ +            {
+ +                u"file": u"manofsteel-trailer.mov",
+ +                u"md5": u"e401fde0813008e3307e54b6f384cff1",
+ +                u"info_dict": {
+ +                    u"duration": 148,
+ +                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
+ +                    u"title": u"Trailer",
+ +                    u"upload_date": u"20121212",
+ +                    u"uploader_id": u"wb",
+ +                },
+ +            },
+ +            {
+ +                u"file": u"manofsteel-teaser.mov",
+ +                u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
+ +                u"info_dict": {
+ +                    u"duration": 93,
+ +                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
+ +                    u"title": u"Teaser",
+ +                    u"upload_date": u"20120721",
+ +                    u"uploader_id": u"wb",
+ +                },
+ +            }
+ +        ]
+ +    }
+ +
+ +    def _real_extract(self, url):
+ +        mobj = re.match(self._VALID_URL, url)
+ +        movie = mobj.group('movie')
+ +        uploader_id = mobj.group('company')
+ +
+ +        playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
+ +        playlist_snippet = self._download_webpage(playlist_url, movie)
+ +        playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet)
+ +        playlist_html = u'<html>' + playlist_cleaned + u'</html>'
+ +
+ +        size_cache = {}
+ +
+ +        doc = xml.etree.ElementTree.fromstring(playlist_html)
+ +        playlist = []
+ +        for li in doc.findall('./div/ul/li'):
+ +            title = li.find('.//h3').text
+ +            video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
+ +            thumbnail = li.find('.//img').attrib['src']
+ +
+ +            date_el = li.find('.//p')
+ +            upload_date = None
+ +            m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text)
+ +            if m:
+ +                upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
+ +            runtime_el = date_el.find('./br')
+ +            m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail)
+ +            duration = None
+ +            if m:
+ +                duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
+ +
+ +            formats = []
++            for formats_el in li.findall('.//a'):
+ +                if formats_el.attrib['class'] != 'OverlayPanel':
+ +                    continue
+ +                target = formats_el.attrib['target']
+ +
+ +                format_code = formats_el.text
+ +                if 'Automatic' in format_code:
+ +                    continue
+ +
+ +                size_q = formats_el.attrib['href']
+ +                size_id = size_q.rpartition('#videos-')[2]
+ +                if size_id not in size_cache:
+ +                    size_url = url + size_q
+ +                    sizepage_html = self._download_webpage(
+ +                        size_url, movie,
+ +                        note=u'Downloading size info %s' % size_id,
+ +                        errnote=u'Error while downloading size info %s' % size_id,
+ +                    )
+ +                    _doc = xml.etree.ElementTree.fromstring(sizepage_html)
+ +                    size_cache[size_id] = _doc
+ +
+ +                sizepage_doc = size_cache[size_id]
+ +                links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
+ +                for vid_a in links:
+ +                    href = vid_a.get('href')
+ +                    if not href.endswith(target):
+ +                        continue
+ +                    detail_q = href.partition('#')[0]
+ +                    detail_url = url + '/' + detail_q
+ +
+ +                    m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q)
+ +                    detail_id = m.group('detail_id')
+ +
+ +                    detail_html = self._download_webpage(
+ +                        detail_url, movie,
+ +                        note=u'Downloading detail %s %s' % (detail_id, size_id),
+ +                        errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
+ +                    )
+ +                    detail_doc = xml.etree.ElementTree.fromstring(detail_html)
+ +                    movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
+ +                    assert movie_link_el.get('class') == 'movieLink'
+ +                    movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
+ +                    ext = determine_ext(movie_link)
+ +                    assert ext == 'mov'
+ +
+ +                    formats.append({
+ +                        'format': format_code,
+ +                        'ext': ext,
+ +                        'url': movie_link,
+ +                    })
+ +
+ +            info = {
+ +                '_type': 'video',
+ +                'id': video_id,
+ +                'title': title,
+ +                'formats': formats,
+ +                'title': title,
+ +                'duration': duration,
+ +                'thumbnail': thumbnail,
+ +                'upload_date': upload_date,
+ +                'uploader_id': uploader_id,
+ +                'user_agent': 'QuickTime compatible (youtube-dl)',
+ +            }
+ +            # TODO: Remove when #980 has been merged
+ +            info['url'] = formats[-1]['url']
+ +            info['ext'] = formats[-1]['ext']
+ +
+ +            playlist.append(info)
+ +
+ +        return {
+ +            '_type': 'playlist',
+ +            'id': movie,
+ +            'entries': playlist,
+ +        }
diff --combined youtube_dl/extractor/common.py

index 52c4483c9ecafe914270444b848ba8ac5966f802,da50abfc1cd492b8d360ef601b44841a938c055b..12169b2bb9209dd901d280e02ce10349e1cad6b4
--- 1/youtube_dl/extractor/common.py
--- 2/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@@ -47,8 -47,7 +47,8 @@@ class InfoExtractor(object)
       uploader_id:    Nickname or id of the video uploader.
       location:       Physical location of the video.
       player_url:     SWF Player URL (used for rtmpdump).
- -    subtitles:      The subtitle file contents.
+ +    subtitles:      The subtitle file contents as a dictionary in the format
+ +                    {language: subtitles}.
       view_count:     How many users have watched the video on the platform.
       urlhandle:      [internal] The urlHandle to be used to download the file,
                       like returned by urllib.request.urlopen
@@@ -78,13 -77,7 +78,13 @@@
       @classmethod
       def suitable(cls, url):
           """Receives a URL and returns True if suitable for this IE."""
- -        return re.match(cls._VALID_URL, url) is not None
+ +
+ +        # This does not use has/getattr intentionally - we want to know whether
+ +        # we have cached the regexp for *this* class, whereas getattr would also
+ +        # match the superclass
+ +        if '_VALID_URL_RE' not in cls.__dict__:
+ +            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
+ +        return cls._VALID_URL_RE.match(url) is not None
   
       @classmethod
       def working(cls):
@@@ -129,7 -122,7 +129,7 @@@
           except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
               if errnote is None:
                   errnote = u'Unable to download webpage'
--            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
++            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
   
       def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
           """ Returns a tuple (page content as string, URL handle) """
diff --combined youtube_dl/utils.py

index 52cfb8a6d18e69be0db213bbf3f4da63f1fcd7d9,59eeaf4a89084783e1ca2607840b3b7dfc4670f5..f78b5fe7889e6493ed8d87046dd7294fb3e3809f
--- 1/youtube_dl/utils.py
--- 2/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@@ -60,6 -60,6 +60,11 @@@ try
   except ImportError: # Python 2
       import httplib as compat_http_client
   
++try:
++    from http.error import HTTPError as compat_HTTPError
++except ImportError:  # Python 2
++    from urllib2 import HTTPError as compat_HTTPError
++
   try:
       from subprocess import DEVNULL
       compat_subprocess_get_DEVNULL = lambda: DEVNULL
@@@ -489,7 -489,7 +494,7 @@@ def make_HTTPS_handler(opts)
   
   class ExtractorError(Exception):
       """Error during info extraction."""
--    def __init__(self, msg, tb=None, expected=False):
++    def __init__(self, msg, tb=None, expected=False, cause=None):
           """ tb, if given, is the original traceback (so that it can be printed out).
           If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
           """
@@@ -497,11 -497,11 +502,12 @@@
           if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
               expected = True
           if not expected:
- -            msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.'
+ +            msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'
           super(ExtractorError, self).__init__(msg)
   
           self.traceback = tb
           self.exc_info = sys.exc_info()  # preserve original exception
++        self.cause = cause
   
       def format_traceback(self):
           if self.traceback is None:
@@@ -657,9 -657,6 +663,9 @@@ def determine_ext(url, default_ext=u'un
       else:
           return default_ext
   
+ +def subtitles_filename(filename, sub_lang, sub_format):
+ +    return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
+ +
   def date_from_str(date_str):
       """
       Return a datetime object from a string in the format YYYYMMDD or
author	Philipp Hagemeister <phihag@phihag.de>
	Wed, 28 Aug 2013 02:25:38 +0000 (04:25 +0200)
committer	Philipp Hagemeister <phihag@phihag.de>
	Wed, 28 Aug 2013 02:25:38 +0000 (04:25 +0200)
		1	2
youtube_dl/extractor/__init__.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/extractor/appletrailers.py	patch \|	diff1 \|	\|	blob \| history
youtube_dl/extractor/common.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/utils.py	patch \|	diff1 \|	diff2 \|	blob \| history