X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FInfoExtractors.py;h=f64b88d55c187c7c9dfa9ef874136e3fbb98387c;hb=a130bc6d024e9bfa3c7f8742f8bf5038b2c6e363;hp=cea30dad81fa4224a848732159aa19684c7d5dbc;hpb=1a9c655e3b1569f315d4193e877cba0b4a863c63;p=youtube-dl diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index cea30dad8..f64b88d55 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -2,26 +2,17 @@ # -*- coding: utf-8 -*- import datetime -import HTMLParser -import httplib import netrc import os import re import socket import time -import urllib -import urllib2 import email.utils import xml.etree.ElementTree import random import math from urlparse import parse_qs -try: - import cStringIO as StringIO -except ImportError: - import StringIO - from utils import * @@ -29,37 +20,48 @@ class InfoExtractor(object): """Information Extractor class. Information extractors are the classes that, given a URL, extract - information from the video (or videos) the URL refers to. This - information includes the real video URL, the video title and simplified - title, author and others. The information is stored in a dictionary - which is then passed to the FileDownloader. The FileDownloader - processes this information possibly downloading the video to the file - system, among other possible outcomes. The dictionaries must include - the following fields: - - id: Video identifier. - url: Final video URL. - uploader: Nickname of the video uploader. - title: Literal title. - ext: Video filename extension. - format: Video format. - player_url: SWF Player URL (may be None). - - The following fields are optional. Their primary purpose is to allow - youtube-dl to serve as the backend for a video search function, such - as the one in youtube2mp3. They are only used when their respective - forced printing functions are called: - - thumbnail: Full URL to a video thumbnail image. - description: One-line video description. + information about the video (or videos) the URL refers to. This + information includes the real video URL, the video title, author and + others. The information is stored in a dictionary which is then + passed to the FileDownloader. The FileDownloader processes this + information possibly downloading the video to the file system, among + other possible outcomes. + + The dictionaries must include the following fields: + + id: Video identifier. + url: Final video URL. + uploader: Nickname of the video uploader, unescaped. + upload_date: Video upload date (YYYYMMDD). + title: Video title, unescaped. + ext: Video filename extension. + + The following fields are optional: + + format: The video format, defaults to ext (used for --get-format) + thumbnail: Full URL to a video thumbnail image. + description: One-line video description. + player_url: SWF Player URL (used for rtmpdump). + subtitles: The .srt file contents. + urlhandle: [internal] The urlHandle to be used to download the file, + like returned by urllib.request.urlopen + + The fields should all be Unicode strings. Subclasses of this one should re-define the _real_initialize() and _real_extract() methods and define a _VALID_URL regexp. Probably, they should also be added to the list of extractors. + + _real_extract() must return a *list* of information dictionaries as + described above. + + Finally, the _WORKING attribute should be set to False for broken IEs + in order to warn the users and skip the tests. """ _ready = False _downloader = None + _WORKING = True def __init__(self, downloader=None): """Constructor. Receives an optional downloader.""" @@ -70,6 +72,10 @@ class InfoExtractor(object): """Receives a URL and returns True if suitable for this IE.""" return re.match(self._VALID_URL, url) is not None + def working(self): + """Getter method for _WORKING.""" + return self._WORKING + def initialize(self): """Initializes an instance (authentication, etc).""" if not self._ready: @@ -237,16 +243,16 @@ class YoutubeIE(InfoExtractor): password = info[2] else: raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) - except (IOError, netrc.NetrcParseError), err: + except (IOError, netrc.NetrcParseError) as err: self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err)) return # Set language - request = urllib2.Request(self._LANG_URL) + request = compat_urllib_request.Request(self._LANG_URL) try: self.report_lang() - urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err)) return @@ -262,14 +268,14 @@ class YoutubeIE(InfoExtractor): 'username': username, 'password': password, } - request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) + request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) try: self.report_login() - login_results = urllib2.urlopen(request).read() + login_results = compat_urllib_request.urlopen(request).read() if re.search(r'(?i)