Merge remote-tracking branch 'Tithen-Firion/master'

author Philipp Hagemeister <phihag@phihag.de>

Sat, 10 Jan 2015 01:26:21 +0000 (02:26 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Sat, 10 Jan 2015 01:26:21 +0000 (02:26 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Sat, 10 Jan 2015 01:26:21 +0000 (02:26 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Sat, 10 Jan 2015 01:26:21 +0000 (02:26 +0100)
diff --combined youtube_dl/extractor/__init__.py

index f544e87f1222eae4afd300309ed1f342eae78aa3,b09ee303d43344de728b7c6b7c3a5772f5a18b1b..a8579d0833a4f7aec9ee43ba322659a18ed216c9
--- 1/youtube_dl/extractor/__init__.py
--- 2/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@@ -3,11 -3,8 +3,11 @@@ from __future__ import unicode_literal
   from .abc import ABCIE
   from .academicearth import AcademicEarthCourseIE
   from .addanime import AddAnimeIE
+ +from .adobetv import AdobeTVIE
   from .adultswim import AdultSwimIE
   from .aftonbladet import AftonbladetIE
+ +from .aljazeera import AlJazeeraIE
+ +from .alphaporno import AlphaPornoIE
   from .anitube import AnitubeIE
   from .anysex import AnySexIE
   from .aol import AolIE
@@@ -25,16 -22,13 +25,16 @@@ from .arte import 
       ArteTVDDCIE,
       ArteTVEmbedIE,
   )
- -from .audiomack import AudiomackIE
+ +from .atresplayer import AtresPlayerIE
+ +from .audiomack import AudiomackIE, AudiomackAlbumIE
   from .auengine import AUEngineIE
+ +from .azubu import AzubuIE
   from .bambuser import BambuserIE, BambuserChannelIE
   from .bandcamp import BandcampIE, BandcampAlbumIE
   from .bbccouk import BBCCoUkIE
   from .beeg import BeegIE
   from .behindkink import BehindKinkIE
+ +from .bet import BetIE
   from .bild import BildIE
   from .bilibili import BiliBiliIE
   from .blinkx import BlinkxIE
@@@ -55,7 -49,7 +55,7 @@@ from .cbsnews import CBSNewsI
   from .ceskatelevize import CeskaTelevizeIE
   from .channel9 import Channel9IE
   from .chilloutzone import ChilloutzoneIE
- -from .cinemassacre import CinemassacreIE
+ +from .cinchcast import CinchcastIE
   from .clipfish import ClipfishIE
   from .cliphunter import CliphunterIE
   from .clipsyndicate import ClipsyndicateIE
@@@ -66,12 -60,9 +66,12 @@@ from .cnet import CNETI
   from .cnn import (
       CNNIE,
       CNNBlogsIE,
+ +    CNNArticleIE,
   )
   from .collegehumor import CollegeHumorIE
   from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
+ +from .comcarcoff import ComCarCoffIE
+ +from .commonmistakes import CommonMistakesIE
   from .condenast import CondeNastIE
   from .cracked import CrackedIE
   from .criterion import CriterionIE
@@@ -93,14 -84,12 +93,14 @@@ from .dotsub import DotsubI
   from .dreisat import DreiSatIE
   from .drtuber import DrTuberIE
   from .drtv import DRTVIE
+ +from .dvtv import DVTVIE
   from .dump import DumpIE
   from .defense import DefenseGouvFrIE
   from .discovery import DiscoveryIE
   from .divxstage import DivxStageIE
   from .dropbox import DropboxIE
   from .ebaumsworld import EbaumsWorldIE
+ +from .echomsk import EchoMskIE
   from .ehow import EHowIE
   from .eighttracks import EightTracksIE
   from .einthusan import EinthusanIE
@@@ -113,7 -102,6 +113,7 @@@ from .elpais import ElPaisI
   from .empflix import EMPFlixIE
   from .engadget import EngadgetIE
   from .eporner import EpornerIE
+ +from .eroprofile import EroProfileIE
   from .escapist import EscapistIE
   from .everyonesmixtape import EveryonesMixtapeIE
   from .exfm import ExfmIE
@@@ -133,8 -121,6 +133,8 @@@ from .fktv import 
   from .flickr import FlickrIE
   from .folketinget import FolketingetIE
   from .fourtube import FourTubeIE
+ +from .foxgay import FoxgayIE
+ +from .foxnews import FoxNewsIE
   from .franceculture import FranceCultureIE
   from .franceinter import FranceInterIE
   from .francetv import (
@@@ -158,8 -144,6 +158,8 @@@ from .gamestar import GameStarI
   from .gametrailers import GametrailersIE
   from .gdcvault import GDCVaultIE
   from .generic import GenericIE
+ +from .giantbomb import GiantBombIE
+ +from .giga import GigaIE
   from .glide import GlideIE
   from .globo import GloboIE
   from .godtube import GodTubeIE
@@@ -170,13 -154,10 +170,13 @@@ from .googlesearch import GoogleSearchI
   from .gorillavid import GorillaVidIE
   from .goshgay import GoshgayIE
   from .grooveshark import GroovesharkIE
+ +from .groupon import GrouponIE
   from .hark import HarkIE
   from .heise import HeiseIE
+ +from .hellporno import HellPornoIE
   from .helsinki import HelsinkiIE
   from .hentaistigma import HentaiStigmaIE
+ +from .hitbox import HitboxIE, HitboxLiveIE
   from .hornbunny import HornBunnyIE
   from .hostingbulk import HostingBulkIE
   from .hotnewhiphop import HotNewHipHopIE
@@@ -235,7 -216,6 +235,7 @@@ from .mdr import MDRI
   from .metacafe import MetacafeIE
   from .metacritic import MetacriticIE
   from .mgoon import MgoonIE
+ +from .minhateca import MinhatecaIE
   from .ministrygrid import MinistryGridIE
   from .mit import TechTVMITIE, MITIE, OCWMITIE
   from .mitele import MiTeleIE
@@@ -265,7 -245,6 +265,7 @@@ from .muzu import MuzuTVI
   from .myspace import MySpaceIE, MySpaceAlbumIE
   from .myspass import MySpassIE
   from .myvideo import MyVideoIE
+ +from .myvidster import MyVidsterIE
   from .naver import NaverIE
   from .nba import NBAIE
   from .nbc import (
@@@ -274,8 -253,6 +274,8 @@@
   )
   from .ndr import NDRIE
   from .ndtv import NDTVIE
+ +from .netzkino import NetzkinoIE
+ +from .nerdcubed import NerdCubedFeedIE
   from .newgrounds import NewgroundsIE
   from .newstube import NewstubeIE
   from .nfb import NFBIE
@@@ -302,7 -279,6 +302,7 @@@ from .nytimes import NYTimesI
   from .nuvid import NuvidIE
   from .oktoberfesttv import OktoberfestTVIE
   from .ooyala import OoyalaIE
+ +from .openfilm import OpenFilmIE
   from .orf import (
       ORFTVthekIE,
       ORFOE1IE,
@@@ -326,13 -302,10 +326,13 @@@ from .promptfile import PromptFileI
   from .prosiebensat1 import ProSiebenSat1IE
   from .pyvideo import PyvideoIE
   from .quickvid import QuickVidIE
+ +from .radiode import RadioDeIE
+ +from .radiobremen import RadioBremenIE
   from .radiofrance import RadioFranceIE
   from .rai import RaiIE
   from .rbmaradio import RBMARadioIE
   from .redtube import RedTubeIE
+ +from .restudy import RestudyIE
   from .reverbnation import ReverbNationIE
   from .ringtv import RingTVIE
   from .ro220 import Ro220IE
@@@ -341,14 -314,12 +341,14 @@@ from .roxwel import RoxwelI
   from .rtbf import RTBFIE
   from .rtlnl import RtlXlIE
   from .rtlnow import RTLnowIE
+ +from .rtp import RTPIE
   from .rts import RTSIE
   from .rtve import RTVEALaCartaIE, RTVELiveIE
   from .ruhd import RUHDIE
   from .rutube import (
       RutubeIE,
       RutubeChannelIE,
+ +    RutubeEmbedIE,
       RutubeMovieIE,
       RutubePersonIE,
   )
@@@ -358,8 -329,6 +358,8 @@@ from .savefrom import SaveFromI
   from .sbs import SBSIE
   from .scivee import SciVeeIE
   from .screencast import ScreencastIE
+ +from .screencastomatic import ScreencastOMaticIE
+ +from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
   from .servingsys import ServingSysIE
   from .sexu import SexuIE
   from .sexykarma import SexyKarmaIE
@@@ -419,7 -388,6 +419,7 @@@ from .ted import TEDI
   from .telebruxelles import TeleBruxellesIE
   from .telecinco import TelecincoIE
   from .telemb import TeleMBIE
+ +from .teletask import TeleTaskIE
   from .tenplay import TenPlayIE
   from .testurl import TestURLIE
   from .tf1 import TF1IE
@@@ -447,7 -415,7 +447,7 @@@ from .tunein import TuneInI
   from .turbo import TurboIE
   from .tutv import TutvIE
   from .tvigle import TvigleIE
- from .tvp import TvpIE
+ from .tvp import TvpIE, TvpSeriesIE
   from .tvplay import TVPlayIE
   from .twentyfourvideo import TwentyFourVideoIE
   from .twitch import TwitchIE
@@@ -478,7 -446,6 +478,7 @@@ from .videott import VideoTtI
   from .videoweed import VideoWeedIE
   from .vidme import VidmeIE
   from .vidzi import VidziIE
+ +from .vier import VierIE, VierVideosIE
   from .vimeo import (
       VimeoIE,
       VimeoAlbumIE,
@@@ -514,7 -481,6 +514,7 @@@ from .wdr import 
       WDRMobileIE,
       WDRMausIE,
   )
+ +from .webofstories import WebOfStoriesIE
   from .weibo import WeiboIE
   from .wimp import WimpIE
   from .wistia import WistiaIE
@@@ -527,12 -493,10 +527,12 @@@ from .xminus import XMinusI
   from .xnxx import XNXXIE
   from .xvideos import XVideosIE
   from .xtube import XTubeUserIE, XTubeIE
+ +from .xxxymovies import XXXYMoviesIE
   from .yahoo import (
       YahooIE,
       YahooSearchIE,
   )
+ +from .yesjapan import YesJapanIE
   from .ynet import YnetIE
   from .youjizz import YouJizzIE
   from .youku import YoukuIE
@@@ -550,12 -514,12 +550,12 @@@ from .youtube import 
       YoutubeSearchURLIE,
       YoutubeShowIE,
       YoutubeSubscriptionsIE,
- -    YoutubeTopListIE,
+ +    YoutubeTruncatedIDIE,
       YoutubeTruncatedURLIE,
       YoutubeUserIE,
       YoutubeWatchLaterIE,
   )
- -from .zdf import ZDFIE
+ +from .zdf import ZDFIE, ZDFChannelIE
   from .zingmp3 import (
       ZingMp3SongIE,
       ZingMp3AlbumIE,
@@@ -576,17 -540,6 +576,17 @@@ def gen_extractors()
       return [klass() for klass in _ALL_CLASSES]
   
   
+ +def list_extractors(age_limit):
+ +    """
+ +    Return a list of extractors that are suitable for the given age,
+ +    sorted by extractor ID.
+ +    """
+ +
+ +    return sorted(
+ +        filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
+ +        key=lambda ie: ie.IE_NAME.lower())
+ +
+ +
   def get_info_extractor(ie_name):
       """Returns the info extractor class with the given ie_name"""
       return globals()[ie_name + 'IE']
diff --combined youtube_dl/extractor/common.py

index b4cd59e4318a52019e060250499b1d50d1e01a8b,b633ea9b92a03be0064ee096cc5fdbe18997e8e7..cd155a0901b6a50189d064da26b74951a41b1e18
--- 1/youtube_dl/extractor/common.py
--- 2/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@@ -21,7 -21,6 +21,7 @@@ from ..compat import 
       compat_str,
   )
   from ..utils import (
+ +    age_restricted,
       clean_html,
       compiled_regex_type,
       ExtractorError,
@@@ -41,7 -40,7 +41,7 @@@ class InfoExtractor(object)
       information about the video (or videos) the URL refers to. This
       information includes the real video URL, the video title, author and
       others. The information is stored in a dictionary which is then
- -    passed to the FileDownloader. The FileDownloader processes this
+ +    passed to the YoutubeDL. The YoutubeDL processes this
       information possibly downloading the video to the file system, among
       other possible outcomes.
   
@@@ -93,8 -92,6 +93,8 @@@
                                    by this field, regardless of all other values.
                                    -1 for default (order by other properties),
                                    -2 or smaller for less than default.
+ +                                 < -1000 to hide the format (if there is
+ +                                    another one which is strictly better)
                       * language_preference  Is this in the correct requested
                                    language?
                                    10 if it's what the URL is about,
@@@ -121,7 -118,6 +121,7 @@@
   
       The following fields are optional:
   
+ +    alt_title:      A secondary title of the video.
       display_id      An alternative identifier for the video, not necessarily
                       unique, but available before title. Typically, id is
                       something like "4234987", title "Dancing naked mole rats",
@@@ -133,7 -129,7 +133,7 @@@
                           * "resolution" (optional, string "{width}x{height"},
                                           deprecated)
       thumbnail:      Full URL to a video thumbnail image.
- -    description:    One-line video description.
+ +    description:    Full video description.
       uploader:       Full name of the video uploader.
       timestamp:      UNIX timestamp of the moment the video became available.
       upload_date:    Video upload date (YYYYMMDD).
@@@ -147,17 -143,6 +147,17 @@@
       like_count:     Number of positive ratings of the video
       dislike_count:  Number of negative ratings of the video
       comment_count:  Number of comments on the video
+ +    comments:       A list of comments, each with one or more of the following
+ +                    properties (all but one of text or html optional):
+ +                        * "author" - human-readable name of the comment author
+ +                        * "author_id" - user ID of the comment author
+ +                        * "id" - Comment ID
+ +                        * "html" - Comment as HTML
+ +                        * "text" - Plain text of the comment
+ +                        * "timestamp" - UNIX timestamp of comment
+ +                        * "parent" - ID of the comment this one is replying to.
+ +                                     Set to "root" to indicate that this is a
+ +                                     comment to the original video.
       age_limit:      Age restriction for the video, as an integer (years)
       webpage_url:    The url to the video webpage, if given to youtube-dl it
                       should allow to get the same result again. (It will be set
@@@ -173,8 -158,8 +173,8 @@@
   
   
       _type "playlist" indicates multiple videos.
- -    There must be a key "entries", which is a list or a PagedList object, each
- -    element of which is a valid dictionary under this specfication.
+ +    There must be a key "entries", which is a list, an iterable, or a PagedList
+ +    object, each element of which is a valid dictionary by this specification.
   
       Additionally, playlists can have "title" and "id" attributes with the same
       semantics as videos (see above).
@@@ -189,10 -174,9 +189,10 @@@
       _type "url" indicates that the video must be extracted from another
       location, possibly by a different extractor. Its only required key is:
       "url" - the next URL to extract.
- -
- -    Additionally, it may have properties believed to be identical to the
- -    resolved entity, for example "title" if the title of the referred video is
+ +    The key "ie_key" can be set to the class name (minus the trailing "IE",
+ +    e.g. "Youtube") if the extractor class is known in advance.
+ +    Additionally, the dictionary may have any properties of the resolved entity
+ +    known in advance, for example "title" if the title of the referred video is
       known ahead of time.
   
   
@@@ -376,9 -360,19 +376,19 @@@
   
           return content
   
-     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
+     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5):
           """ Returns the data of the page as a string """
-         res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
+         success = False
+         try_count = 0
+         while success is False:
+             try:
+                 res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
+                 success = True
+             except compat_http_client.IncompleteRead as e:
+                 try_count += 1
+                 if try_count >= tries:
+                     raise e
+                 self._sleep(timeout, video_id)
           if res is False:
               return res
           else:
@@@ -406,10 -400,6 +416,10 @@@
               url_or_request, video_id, note, errnote, fatal=fatal)
           if (not fatal) and json_string is False:
               return None
+ +        return self._parse_json(
+ +            json_string, video_id, transform_source=transform_source, fatal=fatal)
+ +
+ +    def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
           if transform_source:
               json_string = transform_source(json_string)
           try:
@@@ -459,7 -449,7 +469,7 @@@
           return video_info
   
       @staticmethod
- -    def playlist_result(entries, playlist_id=None, playlist_title=None):
+ +    def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
           """Returns a playlist"""
           video_info = {'_type': 'playlist',
                         'entries': entries}
@@@ -467,8 -457,6 +477,8 @@@
               video_info['id'] = playlist_id
           if playlist_title:
               video_info['title'] = playlist_title
+ +        if playlist_description:
+ +            video_info['description'] = playlist_description
           return video_info
   
       def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
@@@ -603,9 -591,9 +613,9 @@@
           if display_name is None:
               display_name = name
           return self._html_search_regex(
- -            r'''(?ix)<meta
+ +            r'''(?isx)<meta
                       (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
- -                    [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
+ +                    [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
               html, display_name, fatal=fatal, group='content', **kwargs)
   
       def _dc_search_uploader(self, html):
@@@ -812,49 -800,6 +822,49 @@@
           self._sort_formats(formats)
           return formats
   
+ +    # TODO: improve extraction
+ +    def _extract_smil_formats(self, smil_url, video_id):
+ +        smil = self._download_xml(
+ +            smil_url, video_id, 'Downloading SMIL file',
+ +            'Unable to download SMIL file')
+ +
+ +        base = smil.find('./head/meta').get('base')
+ +
+ +        formats = []
+ +        rtmp_count = 0
+ +        for video in smil.findall('./body/switch/video'):
+ +            src = video.get('src')
+ +            if not src:
+ +                continue
+ +            bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+ +            width = int_or_none(video.get('width'))
+ +            height = int_or_none(video.get('height'))
+ +            proto = video.get('proto')
+ +            if not proto:
+ +                if base:
+ +                    if base.startswith('rtmp'):
+ +                        proto = 'rtmp'
+ +                    elif base.startswith('http'):
+ +                        proto = 'http'
+ +            ext = video.get('ext')
+ +            if proto == 'm3u8':
+ +                formats.extend(self._extract_m3u8_formats(src, video_id, ext))
+ +            elif proto == 'rtmp':
+ +                rtmp_count += 1
+ +                streamer = video.get('streamer') or base
+ +                formats.append({
+ +                    'url': streamer,
+ +                    'play_path': src,
+ +                    'ext': 'flv',
+ +                    'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
+ +                    'tbr': bitrate,
+ +                    'width': width,
+ +                    'height': height,
+ +                })
+ +        self._sort_formats(formats)
+ +
+ +        return formats
+ +
       def _live_title(self, name):
           """ Generate the title for a live video """
           now = datetime.datetime.now()
@@@ -884,40 -829,10 +894,40 @@@
           return res
   
       def _set_cookie(self, domain, name, value, expire_time=None):
- -        cookie = compat_cookiejar.Cookie(0, name, value, None, None, domain, None,
+ +        cookie = compat_cookiejar.Cookie(
+ +            0, name, value, None, None, domain, None,
               None, '/', True, False, expire_time, '', None, None, None)
           self._downloader.cookiejar.set_cookie(cookie)
   
+ +    def get_testcases(self, include_onlymatching=False):
+ +        t = getattr(self, '_TEST', None)
+ +        if t:
+ +            assert not hasattr(self, '_TESTS'), \
+ +                '%s has _TEST and _TESTS' % type(self).__name__
+ +            tests = [t]
+ +        else:
+ +            tests = getattr(self, '_TESTS', [])
+ +        for t in tests:
+ +            if not include_onlymatching and t.get('only_matching', False):
+ +                continue
+ +            t['name'] = type(self).__name__[:-len('IE')]
+ +            yield t
+ +
+ +    def is_suitable(self, age_limit):
+ +        """ Test whether the extractor is generally suitable for the given
+ +        age limit (i.e. pornographic sites are not, all others usually are) """
+ +
+ +        any_restricted = False
+ +        for tc in self.get_testcases(include_onlymatching=False):
+ +            if 'playlist' in tc:
+ +                tc = tc['playlist'][0]
+ +            is_restricted = age_restricted(
+ +                tc.get('info_dict', {}).get('age_limit'), age_limit)
+ +            if not is_restricted:
+ +                return True
+ +            any_restricted = any_restricted or is_restricted
+ +        return not any_restricted
+ +
   
   class SearchInfoExtractor(InfoExtractor):
       """
author	Philipp Hagemeister <phihag@phihag.de>
	Sat, 10 Jan 2015 01:26:21 +0000 (02:26 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Sat, 10 Jan 2015 01:26:21 +0000 (02:26 +0100)
		1	2
youtube_dl/extractor/__init__.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/extractor/common.py	patch \|	diff1 \|	diff2 \|	blob \| history