From: Philipp Hagemeister Date: Fri, 3 Jan 2014 11:20:18 +0000 (+0100) Subject: Merge remote-tracking branch 'diffycat/jpopsuki' X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=d2fee313ec71ad8a4d8b57ec9f433210ee01056b;hp=4a9c9b6fdba3782e6342d43769d5dd777797d640;p=youtube-dl Merge remote-tracking branch 'diffycat/jpopsuki' --- diff --git a/README.md b/README.md index 91e18e372..0070617d4 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,11 @@ which means you can modify it, redistribute it or use it however you like. empty string (--proxy "") for direct connection --no-check-certificate Suppress HTTPS certificate validation. --cache-dir DIR Location in the filesystem where youtube-dl can - store downloaded information permanently. By + store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache - /youtube-dl . + /youtube-dl . At the moment, only YouTube player + files (for videos with obfuscated signatures) are + cached, but that may change. --no-cache-dir Disable filesystem caching --bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi @@ -335,3 +337,7 @@ In particular, every site support request issue should only pertain to services ### Is anyone going to need the feature? Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. + +### Is your question about youtube-dl? + +It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug. diff --git a/test/test_playlists.py b/test/test_playlists.py index 1b7b4e3d8..9d522b357 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -28,7 +28,8 @@ from youtube_dl.extractor import ( BandcampAlbumIE, SmotriCommunityIE, SmotriUserIE, - IviCompilationIE + IviCompilationIE, + ImdbListIE, ) @@ -187,6 +188,15 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], u'dezhurnyi_angel/season2') self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон') self.assertTrue(len(result['entries']) >= 20) + + def test_imdb_list(self): + dl = FakeYDL() + ie = ImdbListIE(dl) + result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], u'sMjedvGDd8U') + self.assertEqual(result['title'], u'Animated and Family Films') + self.assertTrue(len(result['entries']) >= 48) if __name__ == '__main__': diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a9a3639d7..08037deda 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -148,6 +148,7 @@ class YoutubeDL(object): socket_timeout: Time to wait for unresponsive hosts, in seconds bidi_workaround: Work around buggy terminals without bidirectional text support, using fridibi + debug_printtraffic:Print out sent and received HTTP traffic The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -164,6 +165,8 @@ class YoutubeDL(object): def __init__(self, params=None): """Create a FileDownloader object with the given options.""" + if params is None: + params = {} self._ies = [] self._ies_instances = {} self._pps = [] @@ -172,7 +175,7 @@ class YoutubeDL(object): self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._err_file = sys.stderr - self.params = {} if params is None else params + self.params = params if params.get('bidi_workaround', False): try: @@ -1014,7 +1017,7 @@ class YoutubeDL(object): def list_formats(self, info_dict): def format_note(fdict): res = u'' - if f.get('ext') in ['f4f', 'f4m']: + if fdict.get('ext') in ['f4f', 'f4m']: res += u'(unsupported) ' if fdict.get('format_note') is not None: res += fdict['format_note'] + u' ' @@ -1124,10 +1127,13 @@ class YoutubeDL(object): if 'http' in proxies and 'https' not in proxies: proxies['https'] = proxies['http'] proxy_handler = compat_urllib_request.ProxyHandler(proxies) + + debuglevel = 1 if self.params.get('debug_printtraffic') else 0 https_handler = make_HTTPS_handler( - self.params.get('nocheckcertificate', False)) + self.params.get('nocheckcertificate', False), debuglevel=debuglevel) + ydlh = YoutubeDLHandler(debuglevel=debuglevel) opener = compat_urllib_request.build_opener( - https_handler, proxy_handler, cookie_processor, YoutubeDLHandler()) + https_handler, proxy_handler, cookie_processor, ydlh) # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play # (See https://github.com/rg3/youtube-dl/issues/1309 for details) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 657e3fd07..b29cf6758 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -186,7 +186,7 @@ def parseOpts(overrideArguments=None): general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option( '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', - help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .') + help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') general.add_option( '--no-cache-dir', action='store_const', const=None, dest='cachedir', help='Disable filesystem caching') @@ -334,7 +334,9 @@ def parseOpts(overrideArguments=None): verbosity.add_option('--youtube-print-sig-code', action='store_true', dest='youtube_print_sig_code', default=False, help=optparse.SUPPRESS_HELP) - + verbosity.add_option('--print-traffic', + dest='debug_printtraffic', action='store_true', default=False, + help=optparse.SUPPRESS_HELP) filesystem.add_option('-t', '--title', action='store_true', dest='usetitle', help='use title in file name (default)', default=False) @@ -696,6 +698,7 @@ def _real_main(argv=None): 'proxy': opts.proxy, 'socket_timeout': opts.socket_timeout, 'bidi_workaround': opts.bidi_workaround, + 'debug_printtraffic': opts.debug_printtraffic, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 14b88efd3..8407727ba 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -133,7 +133,7 @@ class HttpFD(FileDownloader): return False try: stream.write(data_block) - except (IOError, OSError): + except (IOError, OSError) as err: self.to_stderr(u"\n") self.report_error(u'unable to write data: %s' % str(err)) return False diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b59110b15..9c1374373 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -28,6 +28,7 @@ from .channel9 import Channel9IE from .cinemassacre import CinemassacreIE from .clipfish import ClipfishIE from .clipsyndicate import ClipsyndicateIE +from .cmt import CMTIE from .cnn import CNNIE from .collegehumor import CollegeHumorIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE @@ -79,7 +80,10 @@ from .hotnewhiphop import HotNewHipHopIE from .howcast import HowcastIE from .hypem import HypemIE from .ign import IGNIE, OneUPIE -from .imdb import ImdbIE +from .imdb import ( + ImdbIE, + ImdbListIE +) from .ina import InaIE from .infoq import InfoQIE from .instagram import InstagramIE diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py new file mode 100644 index 000000000..88e0e9aba --- /dev/null +++ b/youtube_dl/extractor/cmt.py @@ -0,0 +1,19 @@ +from .mtv import MTVIE + +class CMTIE(MTVIE): + IE_NAME = u'cmt.com' + _VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P[^/]+)\.jhtml' + _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' + + _TESTS = [ + { + u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', + u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2', + u'info_dict': { + u'id': u'989124', + u'ext': u'mp4', + u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"', + u'description': u'Blame It All On My Roots', + }, + }, + ] diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index a54ce3ee7..27bd8256e 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -12,7 +12,9 @@ from ..utils import ( class ComedyCentralIE(MTVServicesInfoExtractor): - _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P.*)' + _VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/ + (video-clips|episodes|cc-studios|video-collections) + /(?P<title>.*)''' _FEED_URL = u'http://comedycentral.com/feeds/mrss/' _TEST = { diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f34d36cb0..f7247752e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -69,7 +69,8 @@ class InfoExtractor(object): download, lower-case. "http", "https", "rtsp", "rtmp" or so. * preference Order number of this format. If this field is - present, the formats get sorted by this field. + present and not None, the formats get sorted + by this field. -1 for default (order by other properties), -2 or smaller for less than default. url: Final video URL. diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 416e25156..0b11d1f10 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -10,11 +10,11 @@ from ..utils import ( class DreiSatIE(InfoExtractor): IE_NAME = '3sat' - _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' + _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' _TEST = { u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983", - u'file': u'36983.webm', - u'md5': u'57c97d0469d71cf874f6815aa2b7c944', + u'file': u'36983.mp4', + u'md5': u'9dcfe344732808dbfcc901537973c922', u'info_dict': { u"title": u"Kaffeeland Schweiz", u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...", diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7a14c98f9..377ae91c4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -162,6 +162,8 @@ class GenericIE(InfoExtractor): return self.url_result('http://' + url) video_id = os.path.splitext(url.split('/')[-1])[0] + self.to_screen(u'%s: Requesting header' % video_id) + try: response = self._send_head(url) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index e5332cce8..16926b4d3 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -55,3 +55,32 @@ class ImdbIE(InfoExtractor): 'description': descr, 'thumbnail': format_info['slate'], } + +class ImdbListIE(InfoExtractor): + IE_NAME = u'imdb:list' + IE_DESC = u'Internet Movie Database lists' + _VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + list_id = mobj.group('id') + + # RSS XML is sometimes malformed + rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, u'Downloading list RSS') + list_title = self._html_search_regex(r'<title>(.*?)', rss, u'list title') + + # Export is independent of actual author_id, but returns 404 if no author_id is provided. + # However, passing dummy author_id seems to be enough. + csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id, + list_id, u'Downloading list CSV') + + entries = [] + for item in csv.split('\n')[1:]: + cols = item.split(',') + if len(cols) < 2: + continue + item_id = cols[1][1:-1] + if item_id.startswith('vi'): + entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb')) + + return self.playlist_result(entries, list_id, list_title) \ No newline at end of file diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 125d81551..7c54ea0f4 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -53,7 +53,7 @@ class MixcloudIE(InfoExtractor): info = json.loads(json_data) preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url') - song_url = preview_url.replace('/previews/', '/cloudcasts/originals/') + song_url = preview_url.replace('/previews/', '/c/originals/') template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) final_song_url = self._get_url(template_url) if final_song_url is None: diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index ed11f521a..f1cf41e2d 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -129,7 +129,7 @@ class MTVIE(MTVServicesInfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') - uri = mobj.group('mgid') + uri = mobj.groupdict().get('mgid') if uri is None: webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index e22ff9c38..951e977bd 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor): (?!sets/)(?P[\w\d-]+)/? (?P<token>[^?]+?)?(?:[?].*)?$) |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)) - |(?P<widget>w\.soundcloud\.com/player/?.*?url=.*) + |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*) ) ''' IE_NAME = u'soundcloud' @@ -193,7 +193,7 @@ class SoundcloudIE(InfoExtractor): if track_id is not None: info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID full_title = track_id - elif mobj.group('widget'): + elif mobj.group('player'): query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) return self.url_result(query['url'][0], ie='Soundcloud') else: diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 584550455..bc31c2e64 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -44,6 +44,7 @@ class WistiaIE(InfoExtractor): 'height': a['height'], 'filesize': a['size'], 'ext': a['ext'], + 'preference': 1 if atype == 'original' else None, }) self._sort_formats(formats) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b0e29c2a8..9424d5e26 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -194,6 +194,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40}, '138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40}, '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40}, + '264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40}, # Dash mp4 audio '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50}, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 83a274043..da5143c8e 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -539,7 +539,8 @@ def formatSeconds(secs): else: return '%d' % secs -def make_HTTPS_handler(opts_no_check_certificate): + +def make_HTTPS_handler(opts_no_check_certificate, **kwargs): if sys.version_info < (3, 2): import httplib @@ -560,7 +561,7 @@ def make_HTTPS_handler(opts_no_check_certificate): class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler): def https_open(self, req): return self.do_open(HTTPSConnectionV3, req) - return HTTPSHandlerV3() + return HTTPSHandlerV3(**kwargs) else: context = ssl.SSLContext(ssl.PROTOCOL_SSLv3) context.verify_mode = (ssl.CERT_NONE @@ -571,7 +572,7 @@ def make_HTTPS_handler(opts_no_check_certificate): context.load_default_certs() except AttributeError: pass # Python < 3.4 - return compat_urllib_request.HTTPSHandler(context=context) + return compat_urllib_request.HTTPSHandler(context=context, **kwargs) class ExtractorError(Exception): """Error during info extraction.""" diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 332913b31..bf5fc8212 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.12.26' +__version__ = '2014.01.03'