Merge remote-tracking branch 'Dineshs91/f4m-2.0'

author Philipp Hagemeister <phihag@phihag.de>

Sat, 10 Jan 2015 16:51:52 +0000 (17:51 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Sat, 10 Jan 2015 16:51:52 +0000 (17:51 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Sat, 10 Jan 2015 16:51:52 +0000 (17:51 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Sat, 10 Jan 2015 16:51:52 +0000 (17:51 +0100)
diff --combined youtube_dl/extractor/__init__.py

index b908eb38ce11906e9cd5ff7e4638212622aefe8c,7918f9ab7255de9de714040ca55e2625715aa2b4..fc83a7d07faf9d60dda75e268eef6f34fd5e4103
--- 1/youtube_dl/extractor/__init__.py
--- 2/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@@ -1,13 -1,8 +1,13 @@@
+ +from __future__ import unicode_literals
+ +
   from .abc import ABCIE
   from .academicearth import AcademicEarthCourseIE
   from .addanime import AddAnimeIE
+ +from .adobetv import AdobeTVIE
   from .adultswim import AdultSwimIE
   from .aftonbladet import AftonbladetIE
+ +from .aljazeera import AlJazeeraIE
+ +from .alphaporno import AlphaPornoIE
   from .anitube import AnitubeIE
   from .anysex import AnySexIE
   from .aol import AolIE
@@@ -25,27 -20,19 +25,27 @@@ from .arte import 
       ArteTVDDCIE,
       ArteTVEmbedIE,
   )
+ +from .atresplayer import AtresPlayerIE
+ +from .atttechchannel import ATTTechChannelIE
+ +from .audiomack import AudiomackIE, AudiomackAlbumIE
   from .auengine import AUEngineIE
+ +from .azubu import AzubuIE
   from .bambuser import BambuserIE, BambuserChannelIE
   from .bandcamp import BandcampIE, BandcampAlbumIE
   from .bbccouk import BBCCoUkIE
   from .beeg import BeegIE
   from .behindkink import BehindKinkIE
+ +from .bet import BetIE
+ +from .bild import BildIE
   from .bilibili import BiliBiliIE
   from .blinkx import BlinkxIE
   from .bliptv import BlipTVIE, BlipTVUserIE
   from .bloomberg import BloombergIE
+ +from .bpb import BpbIE
   from .br import BRIE
   from .breakcom import BreakIE
   from .brightcove import BrightcoveIE
+ +from .buzzfeed import BuzzFeedIE
   from .byutv import BYUtvIE
   from .c56 import C56IE
   from .canal13cl import Canal13clIE
@@@ -56,7 -43,7 +56,7 @@@ from .cbsnews import CBSNewsI
   from .ceskatelevize import CeskaTelevizeIE
   from .channel9 import Channel9IE
   from .chilloutzone import ChilloutzoneIE
- -from .cinemassacre import CinemassacreIE
+ +from .cinchcast import CinchcastIE
   from .clipfish import ClipfishIE
   from .cliphunter import CliphunterIE
   from .clipsyndicate import ClipsyndicateIE
@@@ -67,20 -54,15 +67,20 @@@ from .cnet import CNETI
   from .cnn import (
       CNNIE,
       CNNBlogsIE,
+ +    CNNArticleIE,
   )
   from .collegehumor import CollegeHumorIE
   from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
+ +from .comcarcoff import ComCarCoffIE
+ +from .commonmistakes import CommonMistakesIE
   from .condenast import CondeNastIE
   from .cracked import CrackedIE
   from .criterion import CriterionIE
- -from .crunchyroll import CrunchyrollIE
+ +from .crunchyroll import (
+ +    CrunchyrollIE,
+ +    CrunchyrollShowPlaylistIE
+ +)
   from .cspan import CSpanIE
- -from .d8 import D8IE
   from .dailymotion import (
       DailymotionIE,
       DailymotionPlaylistIE,
@@@ -94,14 -76,12 +94,14 @@@ from .dotsub import DotsubI
   from .dreisat import DreiSatIE
   from .drtuber import DrTuberIE
   from .drtv import DRTVIE
+ +from .dvtv import DVTVIE
   from .dump import DumpIE
   from .defense import DefenseGouvFrIE
   from .discovery import DiscoveryIE
   from .divxstage import DivxStageIE
   from .dropbox import DropboxIE
   from .ebaumsworld import EbaumsWorldIE
+ +from .echomsk import EchoMskIE
   from .ehow import EHowIE
   from .eighttracks import EightTracksIE
   from .einthusan import EinthusanIE
@@@ -114,7 -94,6 +114,7 @@@ from .elpais import ElPaisI
   from .empflix import EMPFlixIE
   from .engadget import EngadgetIE
   from .eporner import EpornerIE
+ +from .eroprofile import EroProfileIE
   from .escapist import EscapistIE
   from .everyonesmixtape import EveryonesMixtapeIE
   from .exfm import ExfmIE
@@@ -132,10 -111,7 +132,10 @@@ from .fktv import 
       FKTVPosteckeIE,
   )
   from .flickr import FlickrIE
+ +from .folketinget import FolketingetIE
   from .fourtube import FourTubeIE
+ +from .foxgay import FoxgayIE
+ +from .foxnews import FoxNewsIE
   from .franceculture import FranceCultureIE
   from .franceinter import FranceInterIE
   from .francetv import (
@@@ -147,7 -123,6 +147,7 @@@
   )
   from .freesound import FreesoundIE
   from .freespeech import FreespeechIE
+ +from .freevideo import FreeVideoIE
   from .funnyordie import FunnyOrDieIE
   from .gamekings import GamekingsIE
   from .gameone import (
@@@ -159,25 -134,18 +159,25 @@@ from .gamestar import GameStarI
   from .gametrailers import GametrailersIE
   from .gdcvault import GDCVaultIE
   from .generic import GenericIE
+ +from .giantbomb import GiantBombIE
+ +from .giga import GigaIE
+ +from .glide import GlideIE
   from .globo import GloboIE
   from .godtube import GodTubeIE
+ +from .goldenmoustache import GoldenMoustacheIE
   from .golem import GolemIE
   from .googleplus import GooglePlusIE
   from .googlesearch import GoogleSearchIE
   from .gorillavid import GorillaVidIE
   from .goshgay import GoshgayIE
   from .grooveshark import GroovesharkIE
+ +from .groupon import GrouponIE
   from .hark import HarkIE
   from .heise import HeiseIE
+ +from .hellporno import HellPornoIE
   from .helsinki import HelsinkiIE
   from .hentaistigma import HentaiStigmaIE
+ +from .hitbox import HitboxIE, HitboxLiveIE
   from .hornbunny import HornBunnyIE
   from .hostingbulk import HostingBulkIE
   from .hotnewhiphop import HotNewHipHopIE
@@@ -215,7 -183,6 +215,7 @@@ from .kontrtube import KontrTubeI
   from .krasview import KrasViewIE
   from .ku6 import Ku6IE
   from .la7 import LA7IE
+ +from .laola1tv import Laola1TvIE
   from .lifenews import LifeNewsIE
   from .liveleak import LiveLeakIE
   from .livestream import (
@@@ -236,7 -203,6 +236,7 @@@ from .mdr import MDRI
   from .metacafe import MetacafeIE
   from .metacritic import MetacriticIE
   from .mgoon import MgoonIE
+ +from .minhateca import MinhatecaIE
   from .ministrygrid import MinistryGridIE
   from .mit import TechTVMITIE, MITIE, OCWMITIE
   from .mitele import MiTeleIE
@@@ -263,10 -229,9 +263,10 @@@ from .muenchentv import MuenchenTVI
   from .musicplayon import MusicPlayOnIE
   from .musicvault import MusicVaultIE
   from .muzu import MuzuTVIE
- -from .myspace import MySpaceIE
+ +from .myspace import MySpaceIE, MySpaceAlbumIE
   from .myspass import MySpassIE
   from .myvideo import MyVideoIE
+ +from .myvidster import MyVidsterIE
   from .naver import NaverIE
   from .nba import NBAIE
   from .nbc import (
@@@ -275,14 -240,12 +275,14 @@@
   )
   from .ndr import NDRIE
   from .ndtv import NDTVIE
+ +from .netzkino import NetzkinoIE
+ +from .nerdcubed import NerdCubedFeedIE
   from .newgrounds import NewgroundsIE
   from .newstube import NewstubeIE
   from .nfb import NFBIE
   from .nfl import NFLIE
   from .nhl import NHLIE, NHLVideocenterIE
- -from .niconico import NiconicoIE
+ +from .niconico import NiconicoIE, NiconicoPlaylistIE
   from .ninegag import NineGagIE
   from .noco import NocoIE
   from .normalboots import NormalbootsIE
@@@ -303,7 -266,6 +303,7 @@@ from .nytimes import NYTimesI
   from .nuvid import NuvidIE
   from .oktoberfesttv import OktoberfestTVIE
   from .ooyala import OoyalaIE
+ +from .openfilm import OpenFilmIE
   from .orf import (
       ORFTVthekIE,
       ORFOE1IE,
@@@ -312,7 -274,6 +312,7 @@@
   from .parliamentliveuk import ParliamentLiveUKIE
   from .patreon import PatreonIE
   from .pbs import PBSIE
+ +from .phoenix import PhoenixIE
   from .photobucket import PhotobucketIE
   from .planetaplay import PlanetaPlayIE
   from .played import PlayedIE
@@@ -326,30 -287,25 +326,31 @@@ from .pornoxo import PornoXOI
   from .promptfile import PromptFileIE
   from .prosiebensat1 import ProSiebenSat1IE
   from .pyvideo import PyvideoIE
+ +from .quickvid import QuickVidIE
+ +from .radiode import RadioDeIE
+ +from .radiobremen import RadioBremenIE
   from .radiofrance import RadioFranceIE
   from .rai import RaiIE
   from .rbmaradio import RBMARadioIE
   from .redtube import RedTubeIE
+ +from .restudy import RestudyIE
   from .reverbnation import ReverbNationIE
   from .ringtv import RingTVIE
   from .ro220 import Ro220IE
   from .rottentomatoes import RottenTomatoesIE
   from .roxwel import RoxwelIE
   from .rtbf import RTBFIE
+ from .rte import RteIE
   from .rtlnl import RtlXlIE
   from .rtlnow import RTLnowIE
+ +from .rtp import RTPIE
   from .rts import RTSIE
   from .rtve import RTVEALaCartaIE, RTVELiveIE
   from .ruhd import RUHDIE
   from .rutube import (
       RutubeIE,
       RutubeChannelIE,
+ +    RutubeEmbedIE,
       RutubeMovieIE,
       RutubePersonIE,
   )
@@@ -359,10 -315,7 +360,10 @@@ from .savefrom import SaveFromI
   from .sbs import SBSIE
   from .scivee import SciVeeIE
   from .screencast import ScreencastIE
+ +from .screencastomatic import ScreencastOMaticIE
+ +from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
   from .servingsys import ServingSysIE
+ +from .sexu import SexuIE
   from .sexykarma import SexyKarmaIE
   from .shared import SharedIE
   from .sharesix import ShareSixIE
@@@ -397,7 -350,6 +398,7 @@@ from .spike import SpikeI
   from .sport5 import Sport5IE
   from .sportbox import SportBoxIE
   from .sportdeutschland import SportDeutschlandIE
+ +from .srmediathek import SRMediathekIE
   from .stanfordoc import StanfordOpenClassroomIE
   from .steam import SteamIE
   from .streamcloud import StreamcloudIE
@@@ -408,7 -360,6 +409,7 @@@ from .syfy import SyfyI
   from .sztvhu import SztvHuIE
   from .tagesschau import TagesschauIE
   from .tapely import TapelyIE
+ +from .tass import TassIE
   from .teachertube import (
       TeacherTubeIE,
       TeacherTubeUserIE,
@@@ -417,10 -368,8 +418,10 @@@ from .teachingchannel import TeachingCh
   from .teamcoco import TeamcocoIE
   from .techtalks import TechTalksIE
   from .ted import TEDIE
+ +from .telebruxelles import TeleBruxellesIE
   from .telecinco import TelecincoIE
   from .telemb import TeleMBIE
+ +from .teletask import TeleTaskIE
   from .tenplay import TenPlayIE
   from .testurl import TestURLIE
   from .tf1 import TF1IE
@@@ -430,7 -379,6 +431,7 @@@ from .thesixtyone import TheSixtyOneI
   from .thisav import ThisAVIE
   from .tinypic import TinyPicIE
   from .tlc import TlcIE, TlcDeIE
+ +from .tmz import TMZIE
   from .tnaflix import TNAFlixIE
   from .thvideo import (
       THVideoIE,
@@@ -444,13 -392,11 +445,13 @@@ from .trutube import TruTubeI
   from .tube8 import Tube8IE
   from .tudou import TudouIE
   from .tumblr import TumblrIE
+ +from .tunein import TuneInIE
   from .turbo import TurboIE
   from .tutv import TutvIE
   from .tvigle import TvigleIE
- -from .tvp import TvpIE
+ +from .tvp import TvpIE, TvpSeriesIE
   from .tvplay import TVPlayIE
+ +from .twentyfourvideo import TwentyFourVideoIE
   from .twitch import TwitchIE
   from .ubu import UbuIE
   from .udemy import (
@@@ -467,7 -413,6 +468,7 @@@ from .vesti import VestiI
   from .vevo import VevoIE
   from .vgtv import VGTVIE
   from .vh1 import VH1IE
+ +from .vice import ViceIE
   from .viddler import ViddlerIE
   from .videobam import VideoBamIE
   from .videodetective import VideoDetectiveIE
@@@ -478,8 -423,6 +479,8 @@@ from .videopremium import VideoPremiumI
   from .videott import VideoTtIE
   from .videoweed import VideoWeedIE
   from .vidme import VidmeIE
+ +from .vidzi import VidziIE
+ +from .vier import VierIE, VierVideosIE
   from .vimeo import (
       VimeoIE,
       VimeoAlbumIE,
@@@ -496,13 -439,9 +497,13 @@@ from .vine import 
       VineUserIE,
   )
   from .viki import VikiIE
- -from .vk import VKIE
+ +from .vk import (
+ +    VKIE,
+ +    VKUserVideosIE,
+ +)
   from .vodlocker import VodlockerIE
   from .vporn import VpornIE
+ +from .vrt import VRTIE
   from .vube import VubeIE
   from .vuclip import VuClipIE
   from .vulture import VultureIE
@@@ -515,7 -454,6 +516,7 @@@ from .wdr import 
       WDRMobileIE,
       WDRMausIE,
   )
+ +from .webofstories import WebOfStoriesIE
   from .weibo import WeiboIE
   from .wimp import WimpIE
   from .wistia import WistiaIE
@@@ -524,16 -462,13 +525,16 @@@ from .wrzuta import WrzutaI
   from .xbef import XBefIE
   from .xboxclips import XboxClipsIE
   from .xhamster import XHamsterIE
+ +from .xminus import XMinusIE
   from .xnxx import XNXXIE
   from .xvideos import XVideosIE
   from .xtube import XTubeUserIE, XTubeIE
+ +from .xxxymovies import XXXYMoviesIE
   from .yahoo import (
       YahooIE,
       YahooSearchIE,
   )
+ +from .yesjapan import YesJapanIE
   from .ynet import YnetIE
   from .youjizz import YouJizzIE
   from .youku import YoukuIE
@@@ -551,16 -486,14 +552,16 @@@ from .youtube import 
       YoutubeSearchURLIE,
       YoutubeShowIE,
       YoutubeSubscriptionsIE,
- -    YoutubeTopListIE,
+ +    YoutubeTruncatedIDIE,
       YoutubeTruncatedURLIE,
       YoutubeUserIE,
       YoutubeWatchLaterIE,
   )
- -
- -from .zdf import ZDFIE
- -
+ +from .zdf import ZDFIE, ZDFChannelIE
+ +from .zingmp3 import (
+ +    ZingMp3SongIE,
+ +    ZingMp3AlbumIE,
+ +)
   
   _ALL_CLASSES = [
       klass
@@@ -577,17 -510,6 +578,17 @@@ def gen_extractors()
       return [klass() for klass in _ALL_CLASSES]
   
   
+ +def list_extractors(age_limit):
+ +    """
+ +    Return a list of extractors that are suitable for the given age,
+ +    sorted by extractor ID.
+ +    """
+ +
+ +    return sorted(
+ +        filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
+ +        key=lambda ie: ie.IE_NAME.lower())
+ +
+ +
   def get_info_extractor(ie_name):
       """Returns the info extractor class with the given ie_name"""
- -    return globals()[ie_name+'IE']
+ +    return globals()[ie_name + 'IE']
diff --combined youtube_dl/extractor/common.py

index 363e2000c18f283b70085139f4ac2b4e09fcb99c,a8674dd85d705e4e3dff57e352749dba23b6e0ab..03f3f18c83012cdced0e305fe1cc02d69a85bb7c
--- 1/youtube_dl/extractor/common.py
--- 2/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@@ -12,16 -12,13 +12,16 @@@ import sy
   import time
   import xml.etree.ElementTree
   
- -from ..utils import (
+ +from ..compat import (
+ +    compat_cookiejar,
       compat_http_client,
       compat_urllib_error,
       compat_urllib_parse_urlparse,
       compat_urlparse,
       compat_str,
- -
+ +)
+ +from ..utils import (
+ +    age_restricted,
       clean_html,
       compiled_regex_type,
       ExtractorError,
@@@ -41,15 -38,11 +41,15 @@@ class InfoExtractor(object)
       information about the video (or videos) the URL refers to. This
       information includes the real video URL, the video title, author and
       others. The information is stored in a dictionary which is then
- -    passed to the FileDownloader. The FileDownloader processes this
+ +    passed to the YoutubeDL. The YoutubeDL processes this
       information possibly downloading the video to the file system, among
       other possible outcomes.
   
- -    The dictionaries must include the following fields:
+ +    The type field determines the the type of the result.
+ +    By far the most common value (and the default if _type is missing) is
+ +    "video", which indicates a single video.
+ +
+ +    For a video, the dictionaries must include the following fields:
   
       id:             Video identifier.
       title:          Video title, unescaped.
@@@ -79,7 -72,6 +79,7 @@@
                       * acodec     Name of the audio codec in use
                       * asr        Audio sampling rate in Hertz
                       * vbr        Average video bitrate in KBit/s
+ +                    * fps        Frame rate
                       * vcodec     Name of the video codec in use
                       * container  Name of the container format
                       * filesize   The number of bytes, if known in advance
@@@ -93,30 -85,16 +93,30 @@@
                                    by this field, regardless of all other values.
                                    -1 for default (order by other properties),
                                    -2 or smaller for less than default.
+ +                                 < -1000 to hide the format (if there is
+ +                                    another one which is strictly better)
+ +                    * language_preference  Is this in the correct requested
+ +                                 language?
+ +                                 10 if it's what the URL is about,
+ +                                 -1 for default (don't know),
+ +                                 -10 otherwise, other values reserved for now.
                       * quality    Order number of the video quality of this
                                    format, irrespective of the file format.
                                    -1 for default (order by other properties),
                                    -2 or smaller for less than default.
+ +                    * source_preference  Order number for this video source
+ +                                  (quality takes higher priority)
+ +                                 -1 for default (order by other properties),
+ +                                 -2 or smaller for less than default.
                       * http_referer  HTTP Referer header value to set.
                       * http_method  HTTP method to use for the download.
                       * http_headers  A dictionary of additional HTTP headers
                                    to add to the request.
                       * http_post_data  Additional data to send with a POST
                                    request.
+ +                    * stretched_ratio  If given and not 1, indicates that the
+ +                                       video's pixels are not square.
+ +                                       width : height ratio as float.
       url:            Final video URL.
       ext:            Video filename extension.
       format:         The video format, defaults to ext (used for --get-format)
@@@ -124,7 -102,6 +124,7 @@@
   
       The following fields are optional:
   
+ +    alt_title:      A secondary title of the video.
       display_id      An alternative identifier for the video, not necessarily
                       unique, but available before title. Typically, id is
                       something like "4234987", title "Dancing naked mole rats",
@@@ -136,7 -113,7 +136,7 @@@
                           * "resolution" (optional, string "{width}x{height"},
                                           deprecated)
       thumbnail:      Full URL to a video thumbnail image.
- -    description:    One-line video description.
+ +    description:    Full video description.
       uploader:       Full name of the video uploader.
       timestamp:      UNIX timestamp of the moment the video became available.
       upload_date:    Video upload date (YYYYMMDD).
@@@ -150,17 -127,6 +150,17 @@@
       like_count:     Number of positive ratings of the video
       dislike_count:  Number of negative ratings of the video
       comment_count:  Number of comments on the video
+ +    comments:       A list of comments, each with one or more of the following
+ +                    properties (all but one of text or html optional):
+ +                        * "author" - human-readable name of the comment author
+ +                        * "author_id" - user ID of the comment author
+ +                        * "id" - Comment ID
+ +                        * "html" - Comment as HTML
+ +                        * "text" - Plain text of the comment
+ +                        * "timestamp" - UNIX timestamp of comment
+ +                        * "parent" - ID of the comment this one is replying to.
+ +                                     Set to "root" to indicate that this is a
+ +                                     comment to the original video.
       age_limit:      Age restriction for the video, as an integer (years)
       webpage_url:    The url to the video webpage, if given to youtube-dl it
                       should allow to get the same result again. (It will be set
@@@ -174,39 -140,6 +174,39 @@@
   
       Unless mentioned otherwise, None is equivalent to absence of information.
   
+ +
+ +    _type "playlist" indicates multiple videos.
+ +    There must be a key "entries", which is a list, an iterable, or a PagedList
+ +    object, each element of which is a valid dictionary by this specification.
+ +
+ +    Additionally, playlists can have "title" and "id" attributes with the same
+ +    semantics as videos (see above).
+ +
+ +
+ +    _type "multi_video" indicates that there are multiple videos that
+ +    form a single show, for examples multiple acts of an opera or TV episode.
+ +    It must have an entries key like a playlist and contain all the keys
+ +    required for a video at the same time.
+ +
+ +
+ +    _type "url" indicates that the video must be extracted from another
+ +    location, possibly by a different extractor. Its only required key is:
+ +    "url" - the next URL to extract.
+ +    The key "ie_key" can be set to the class name (minus the trailing "IE",
+ +    e.g. "Youtube") if the extractor class is known in advance.
+ +    Additionally, the dictionary may have any properties of the resolved entity
+ +    known in advance, for example "title" if the title of the referred video is
+ +    known ahead of time.
+ +
+ +
+ +    _type "url_transparent" entities have the same specification as "url", but
+ +    indicate that the given additional information is more precise than the one
+ +    associated with the resolved URL.
+ +    This is useful when a site employs a video service that hosts the video and
+ +    its technical metadata, but that video service does not embed a useful
+ +    title, description etc.
+ +
+ +
       Subclasses of this one should re-define the _real_initialize() and
       _real_extract() methods and define a _VALID_URL regexp.
       Probably, they should also be added to the list of extractors.
@@@ -305,6 -238,7 +305,6 @@@
   
       def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
           """ Returns a tuple (page content as string, URL handle) """
- -
           # Strip hashes from the URL (#1038)
           if isinstance(url_or_request, (compat_str, str)):
               url_or_request = url_or_request.partition('#')[0]
@@@ -313,14 -247,8 +313,14 @@@
           if urlh is False:
               assert not fatal
               return False
+ +        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
+ +        return (content, urlh)
+ +
+ +    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None):
           content_type = urlh.headers.get('Content-Type', '')
           webpage_bytes = urlh.read()
+ +        if prefix is not None:
+ +            webpage_bytes = prefix + webpage_bytes
           m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
           if m:
               encoding = m.group(1)
@@@ -377,21 -305,11 +377,21 @@@
                   msg += ' Visit %s for more details' % blocked_iframe
               raise ExtractorError(msg, expected=True)
   
- -        return (content, urlh)
+ +        return content
   
- -    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
+ +    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5):
           """ Returns the data of the page as a string """
- -        res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
+ +        success = False
+ +        try_count = 0
+ +        while success is False:
+ +            try:
+ +                res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
+ +                success = True
+ +            except compat_http_client.IncompleteRead as e:
+ +                try_count += 1
+ +                if try_count >= tries:
+ +                    raise e
+ +                self._sleep(timeout, video_id)
           if res is False:
               return res
           else:
@@@ -419,10 -337,6 +419,10 @@@
               url_or_request, video_id, note, errnote, fatal=fatal)
           if (not fatal) and json_string is False:
               return None
+ +        return self._parse_json(
+ +            json_string, video_id, transform_source=transform_source, fatal=fatal)
+ +
+ +    def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
           if transform_source:
               json_string = transform_source(json_string)
           try:
@@@ -459,20 -373,19 +459,20 @@@
           """Report attempt to log in."""
           self.to_screen('Logging in')
   
- -    #Methods for following #608
+ +    # Methods for following #608
       @staticmethod
       def url_result(url, ie=None, video_id=None):
           """Returns a url that points to a page that should be processed"""
- -        #TODO: ie should be the class used for getting the info
+ +        # TODO: ie should be the class used for getting the info
           video_info = {'_type': 'url',
                         'url': url,
                         'ie_key': ie}
           if video_id is not None:
               video_info['id'] = video_id
           return video_info
+ +
       @staticmethod
- -    def playlist_result(entries, playlist_id=None, playlist_title=None):
+ +    def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
           """Returns a playlist"""
           video_info = {'_type': 'playlist',
                         'entries': entries}
@@@ -480,11 -393,9 +480,11 @@@
               video_info['id'] = playlist_id
           if playlist_title:
               video_info['title'] = playlist_title
+ +        if playlist_description:
+ +            video_info['description'] = playlist_description
           return video_info
   
- -    def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
+ +    def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
           """
           Perform a regex search on the given string, using a single or a list of
           patterns returning the first matching group.
@@@ -505,25 -416,22 +505,25 @@@
               _name = name
   
           if mobj:
- -            # return the first matching group
- -            return next(g for g in mobj.groups() if g is not None)
+ +            if group is None:
+ +                # return the first matching group
+ +                return next(g for g in mobj.groups() if g is not None)
+ +            else:
+ +                return mobj.group(group)
           elif default is not _NO_DEFAULT:
               return default
           elif fatal:
               raise RegexNotFoundError('Unable to extract %s' % _name)
           else:
               self._downloader.report_warning('unable to extract %s; '
- -                'please report this issue on http://yt-dl.org/bug' % _name)
+ +                                            'please report this issue on http://yt-dl.org/bug' % _name)
               return None
   
- -    def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
+ +    def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
           """
           Like _search_regex, but strips HTML tags and unescapes entities.
           """
- -        res = self._search_regex(pattern, string, name, default, fatal, flags)
+ +        res = self._search_regex(pattern, string, name, default, fatal, flags, group)
           if res:
               return clean_html(res).strip()
           else:
@@@ -556,7 -464,7 +556,7 @@@
                       raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
               except (IOError, netrc.NetrcParseError) as err:
                   self._downloader.report_warning('parsing .netrc: %s' % compat_str(err))
- -        
+ +
           return (username, password)
   
       def _get_tfa_info(self):
@@@ -616,10 -524,10 +616,10 @@@
           if display_name is None:
               display_name = name
           return self._html_search_regex(
- -            r'''(?ix)<meta
- -                    (?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?)
- -                    [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
- -            html, display_name, fatal=fatal, **kwargs)
+ +            r'''(?isx)<meta
+ +                    (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
+ +                    [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
+ +            html, display_name, fatal=fatal, group='content', **kwargs)
   
       def _dc_search_uploader(self, html):
           return self._html_search_meta('dc.creator', html, 'uploader')
@@@ -650,7 -558,7 +650,7 @@@
   
       def _twitter_search_player(self, html):
           return self._html_search_meta('twitter:player', html,
- -            'twitter card player')
+ +                                      'twitter card player')
   
       def _sort_formats(self, formats):
           if not formats:
@@@ -695,7 -603,6 +695,7 @@@
   
               return (
                   preference,
+ +                f.get('language_preference') if f.get('language_preference') is not None else -1,
                   f.get('quality') if f.get('quality') is not None else -1,
                   f.get('height') if f.get('height') is not None else -1,
                   f.get('width') if f.get('width') is not None else -1,
@@@ -704,16 -611,14 +704,16 @@@
                   f.get('vbr') if f.get('vbr') is not None else -1,
                   f.get('abr') if f.get('abr') is not None else -1,
                   audio_ext_preference,
+ +                f.get('fps') if f.get('fps') is not None else -1,
                   f.get('filesize') if f.get('filesize') is not None else -1,
                   f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
+ +                f.get('source_preference') if f.get('source_preference') is not None else -1,
                   f.get('format_id'),
               )
           formats.sort(key=_formats_key)
   
       def http_scheme(self):
- -        """ Either "https:" or "https:", depending on the user's preferences """
+ +        """ Either "http:" or "https:", depending on the user's preferences """
           return (
               'http:'
               if self._downloader.params.get('prefer_insecure', False)
@@@ -742,8 -647,14 +742,14 @@@
               'Unable to download f4m manifest')
   
           formats = []
+         manifest_version = '1.0'
           media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
+         if not media_nodes:
+             manifest_version = '2.0'
+             media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
           for i, media_el in enumerate(media_nodes):
+             if manifest_version == '2.0':
+                 manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href')
               tbr = int_or_none(media_el.attrib.get('bitrate'))
               format_id = 'f4m-%d' % (i if tbr is None else tbr)
               formats.append({
@@@ -776,10 -687,7 +782,10 @@@
               if re.match(r'^https?://', u)
               else compat_urlparse.urljoin(m3u8_url, u))
   
- -        m3u8_doc = self._download_webpage(m3u8_url, video_id)
+ +        m3u8_doc = self._download_webpage(
+ +            m3u8_url, video_id,
+ +            note='Downloading m3u8 information',
+ +            errnote='Failed to download m3u8 information')
           last_info = None
           kv_rex = re.compile(
               r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
@@@ -825,49 -733,6 +831,49 @@@
           self._sort_formats(formats)
           return formats
   
+ +    # TODO: improve extraction
+ +    def _extract_smil_formats(self, smil_url, video_id):
+ +        smil = self._download_xml(
+ +            smil_url, video_id, 'Downloading SMIL file',
+ +            'Unable to download SMIL file')
+ +
+ +        base = smil.find('./head/meta').get('base')
+ +
+ +        formats = []
+ +        rtmp_count = 0
+ +        for video in smil.findall('./body/switch/video'):
+ +            src = video.get('src')
+ +            if not src:
+ +                continue
+ +            bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+ +            width = int_or_none(video.get('width'))
+ +            height = int_or_none(video.get('height'))
+ +            proto = video.get('proto')
+ +            if not proto:
+ +                if base:
+ +                    if base.startswith('rtmp'):
+ +                        proto = 'rtmp'
+ +                    elif base.startswith('http'):
+ +                        proto = 'http'
+ +            ext = video.get('ext')
+ +            if proto == 'm3u8':
+ +                formats.extend(self._extract_m3u8_formats(src, video_id, ext))
+ +            elif proto == 'rtmp':
+ +                rtmp_count += 1
+ +                streamer = video.get('streamer') or base
+ +                formats.append({
+ +                    'url': streamer,
+ +                    'play_path': src,
+ +                    'ext': 'flv',
+ +                    'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
+ +                    'tbr': bitrate,
+ +                    'width': width,
+ +                    'height': height,
+ +                })
+ +        self._sort_formats(formats)
+ +
+ +        return formats
+ +
       def _live_title(self, name):
           """ Generate the title for a live video """
           now = datetime.datetime.now()
@@@ -896,41 -761,6 +902,41 @@@
                   self._downloader.report_warning(msg)
           return res
   
+ +    def _set_cookie(self, domain, name, value, expire_time=None):
+ +        cookie = compat_cookiejar.Cookie(
+ +            0, name, value, None, None, domain, None,
+ +            None, '/', True, False, expire_time, '', None, None, None)
+ +        self._downloader.cookiejar.set_cookie(cookie)
+ +
+ +    def get_testcases(self, include_onlymatching=False):
+ +        t = getattr(self, '_TEST', None)
+ +        if t:
+ +            assert not hasattr(self, '_TESTS'), \
+ +                '%s has _TEST and _TESTS' % type(self).__name__
+ +            tests = [t]
+ +        else:
+ +            tests = getattr(self, '_TESTS', [])
+ +        for t in tests:
+ +            if not include_onlymatching and t.get('only_matching', False):
+ +                continue
+ +            t['name'] = type(self).__name__[:-len('IE')]
+ +            yield t
+ +
+ +    def is_suitable(self, age_limit):
+ +        """ Test whether the extractor is generally suitable for the given
+ +        age limit (i.e. pornographic sites are not, all others usually are) """
+ +
+ +        any_restricted = False
+ +        for tc in self.get_testcases(include_onlymatching=False):
+ +            if 'playlist' in tc:
+ +                tc = tc['playlist'][0]
+ +            is_restricted = age_restricted(
+ +                tc.get('info_dict', {}).get('age_limit'), age_limit)
+ +            if not is_restricted:
+ +                return True
+ +            any_restricted = any_restricted or is_restricted
+ +        return not any_restricted
+ +
   
   class SearchInfoExtractor(InfoExtractor):
       """
author	Philipp Hagemeister <phihag@phihag.de>
	Sat, 10 Jan 2015 16:51:52 +0000 (17:51 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Sat, 10 Jan 2015 16:51:52 +0000 (17:51 +0100)
		1	2
youtube_dl/extractor/__init__.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/extractor/common.py	patch \|	diff1 \|	diff2 \|	blob \| history