From: Philipp Hagemeister Date: Fri, 3 Jan 2014 12:24:29 +0000 (+0100) Subject: [lynda] minor changes X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=a7c26e7338ceed06b579775f315b078644a7482b;hp=c7f8537dd9752e83661b69e46a4f113222c7a503;p=youtube-dl [lynda] minor changes --- diff --git a/README.md b/README.md index 91e18e372..0070617d4 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,11 @@ which means you can modify it, redistribute it or use it however you like. empty string (--proxy "") for direct connection --no-check-certificate Suppress HTTPS certificate validation. --cache-dir DIR Location in the filesystem where youtube-dl can - store downloaded information permanently. By + store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache - /youtube-dl . + /youtube-dl . At the moment, only YouTube player + files (for videos with obfuscated signatures) are + cached, but that may change. --no-cache-dir Disable filesystem caching --bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi @@ -335,3 +337,7 @@ In particular, every site support request issue should only pertain to services ### Is anyone going to need the feature? Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. + +### Is your question about youtube-dl? + +It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug. diff --git a/test/test_playlists.py b/test/test_playlists.py index 1b7b4e3d8..9d522b357 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -28,7 +28,8 @@ from youtube_dl.extractor import ( BandcampAlbumIE, SmotriCommunityIE, SmotriUserIE, - IviCompilationIE + IviCompilationIE, + ImdbListIE, ) @@ -187,6 +188,15 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], u'dezhurnyi_angel/season2') self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон') self.assertTrue(len(result['entries']) >= 20) + + def test_imdb_list(self): + dl = FakeYDL() + ie = ImdbListIE(dl) + result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], u'sMjedvGDd8U') + self.assertEqual(result['title'], u'Animated and Family Films') + self.assertTrue(len(result['entries']) >= 48) if __name__ == '__main__': diff --git a/test/test_utils.py b/test/test_utils.py index e5778cd83..bee355ee0 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -18,6 +18,7 @@ from youtube_dl.utils import ( find_xpath_attr, get_meta_content, orderedSet, + parse_duration, sanitize_filename, shell_quote, smuggle_url, @@ -192,5 +193,12 @@ class TestUtil(unittest.TestCase): url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'), u'trailer.mp4') + def test_parse_duration(self): + self.assertEqual(parse_duration(None), None) + self.assertEqual(parse_duration('1'), 1) + self.assertEqual(parse_duration('1337:12'), 80232) + self.assertEqual(parse_duration('9:12:43'), 33163) + self.assertEqual(parse_duration('x:y'), None) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index 69aedf87a..097e1a9e4 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -10,6 +10,7 @@ from .utils import ( PostProcessingError, shell_quote, subtitles_filename, + prepend_extension, ) @@ -496,13 +497,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor): return True, info filename = info['filepath'] - ext = os.path.splitext(filename)[1][1:] - temp_filename = filename + u'.temp' + temp_filename = prepend_extension(filename, 'temp') options = ['-c', 'copy'] for (name, value) in metadata.items(): options.extend(['-metadata', '%s=%s' % (name, value)]) - options.extend(['-f', ext]) self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename) self.run_ffmpeg(filename, temp_filename, options) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a9a3639d7..08037deda 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -148,6 +148,7 @@ class YoutubeDL(object): socket_timeout: Time to wait for unresponsive hosts, in seconds bidi_workaround: Work around buggy terminals without bidirectional text support, using fridibi + debug_printtraffic:Print out sent and received HTTP traffic The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -164,6 +165,8 @@ class YoutubeDL(object): def __init__(self, params=None): """Create a FileDownloader object with the given options.""" + if params is None: + params = {} self._ies = [] self._ies_instances = {} self._pps = [] @@ -172,7 +175,7 @@ class YoutubeDL(object): self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._err_file = sys.stderr - self.params = {} if params is None else params + self.params = params if params.get('bidi_workaround', False): try: @@ -1014,7 +1017,7 @@ class YoutubeDL(object): def list_formats(self, info_dict): def format_note(fdict): res = u'' - if f.get('ext') in ['f4f', 'f4m']: + if fdict.get('ext') in ['f4f', 'f4m']: res += u'(unsupported) ' if fdict.get('format_note') is not None: res += fdict['format_note'] + u' ' @@ -1124,10 +1127,13 @@ class YoutubeDL(object): if 'http' in proxies and 'https' not in proxies: proxies['https'] = proxies['http'] proxy_handler = compat_urllib_request.ProxyHandler(proxies) + + debuglevel = 1 if self.params.get('debug_printtraffic') else 0 https_handler = make_HTTPS_handler( - self.params.get('nocheckcertificate', False)) + self.params.get('nocheckcertificate', False), debuglevel=debuglevel) + ydlh = YoutubeDLHandler(debuglevel=debuglevel) opener = compat_urllib_request.build_opener( - https_handler, proxy_handler, cookie_processor, YoutubeDLHandler()) + https_handler, proxy_handler, cookie_processor, ydlh) # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play # (See https://github.com/rg3/youtube-dl/issues/1309 for details) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index c37d28c59..b29cf6758 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -44,6 +44,7 @@ __license__ = 'Public Domain' import codecs import getpass +import locale import optparse import os import random @@ -185,7 +186,7 @@ def parseOpts(overrideArguments=None): general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option( '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', - help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .') + help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') general.add_option( '--no-cache-dir', action='store_const', const=None, dest='cachedir', help='Disable filesystem caching') @@ -333,7 +334,9 @@ def parseOpts(overrideArguments=None): verbosity.add_option('--youtube-print-sig-code', action='store_true', dest='youtube_print_sig_code', default=False, help=optparse.SUPPRESS_HELP) - + verbosity.add_option('--print-traffic', + dest='debug_printtraffic', action='store_true', default=False, + help=optparse.SUPPRESS_HELP) filesystem.add_option('-t', '--title', action='store_true', dest='usetitle', help='use title in file name (default)', default=False) @@ -473,6 +476,8 @@ def parseOpts(overrideArguments=None): write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') + write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' % + (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding())) return parser, opts, args @@ -693,6 +698,7 @@ def _real_main(argv=None): 'proxy': opts.proxy, 'socket_timeout': opts.socket_timeout, 'bidi_workaround': opts.bidi_workaround, + 'debug_printtraffic': opts.debug_printtraffic, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 14b88efd3..8407727ba 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -133,7 +133,7 @@ class HttpFD(FileDownloader): return False try: stream.write(data_block) - except (IOError, OSError): + except (IOError, OSError) as err: self.to_stderr(u"\n") self.report_error(u'unable to write data: %s' % str(err)) return False diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 2caa078b5..21d564dba 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -28,6 +28,7 @@ from .channel9 import Channel9IE from .cinemassacre import CinemassacreIE from .clipfish import ClipfishIE from .clipsyndicate import ClipsyndicateIE +from .cmt import CMTIE from .cnn import CNNIE from .collegehumor import CollegeHumorIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE @@ -79,7 +80,10 @@ from .hotnewhiphop import HotNewHipHopIE from .howcast import HowcastIE from .hypem import HypemIE from .ign import IGNIE, OneUPIE -from .imdb import ImdbIE +from .imdb import ( + ImdbIE, + ImdbListIE +) from .ina import InaIE from .infoq import InfoQIE from .instagram import InstagramIE @@ -91,6 +95,7 @@ from .ivi import ( from .jeuxvideo import JeuxVideoIE from .jukebox import JukeboxIE from .justintv import JustinTVIE +from .jpopsukitv import JpopsukiIE from .kankan import KankanIE from .keezmovies import KeezMoviesIE from .kickstarter import KickStarterIE @@ -101,6 +106,7 @@ from .lynda import ( LyndaIE, LyndaCourseIE ) +from .macgamestore import MacGameStoreIE from .mdr import MDRIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 3a32c14c5..15aee2786 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -10,14 +10,14 @@ from ..utils import ( class BandcampIE(InfoExtractor): - IE_NAME = u'Bandcamp' _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P.*)' _TESTS = [{ u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', u'file': u'1812978515.mp3', - u'md5': u'cdeb30cdae1921719a3cbcab696ef53c', + u'md5': u'c557841d5e50261777a6585648adf439', u'info_dict': { - u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad" + u"title": u"youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad", + u"duration": 10, }, u'skip': u'There is a limit of 200 free downloads / month for the test song' }] @@ -30,29 +30,42 @@ class BandcampIE(InfoExtractor): m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage) if m_download is None: m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage) - if m_trackinfo: - json_code = m_trackinfo.group(1) - data = json.loads(json_code) + if m_trackinfo: + json_code = m_trackinfo.group(1) + data = json.loads(json_code) + d = data[0] + + duration = int(round(d['duration'])) + formats = [] + for format_id, format_url in d['file'].items(): + ext, _, abr_str = format_id.partition('-') + + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'ext': format_id.partition('-')[0], + 'vcodec': 'none', + 'acodec': format_id.partition('-')[0], + 'abr': int(format_id.partition('-')[2]), + }) + + self._sort_formats(formats) - for d in data: - formats = [{ - 'format_id': 'format_id', - 'url': format_url, - 'ext': format_id.partition('-')[0] - } for format_id, format_url in sorted(d['file'].items())] return { 'id': compat_str(d['id']), 'title': d['title'], 'formats': formats, + 'duration': duration, } - else: - raise ExtractorError(u'No free songs found') + else: + raise ExtractorError(u'No free songs found') download_link = m_download.group(1) - id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$', - webpage, re.MULTILINE|re.DOTALL).group('id') + video_id = re.search( + r'var TralbumData = {(.*?)id: (?P<id>\d*?)$', + webpage, re.MULTILINE | re.DOTALL).group('id') - download_webpage = self._download_webpage(download_link, id, + download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page') # We get the dictionary of the track from some javascrip code info = re.search(r'items: (.*?),$', @@ -66,21 +79,21 @@ class BandcampIE(InfoExtractor): m_url = re.match(re_url, initial_url) #We build the url we will use to get the final track url # This url is build in Bandcamp in the script download_bunde_*.js - request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts')) + request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts')) final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url') # If we could correctly generate the .rand field the url would be #in the "download_url" key final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1) - track_info = {'id':id, - 'title' : info[u'title'], - 'ext' : 'mp3', - 'url' : final_url, - 'thumbnail' : info[u'thumb_url'], - 'uploader' : info[u'artist'] - } - - return [track_info] + return { + 'id': video_id, + 'title': info[u'title'], + 'ext': 'mp3', + 'vcodec': 'none', + 'url': final_url, + 'thumbnail': info[u'thumb_url'], + 'uploader': info[u'artist'], + } class BandcampAlbumIE(InfoExtractor): @@ -117,7 +130,7 @@ class BandcampAlbumIE(InfoExtractor): webpage = self._download_webpage(url, title) tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) if not tracks_paths: - raise ExtractorError(u'The page doesn\'t contain any track') + raise ExtractorError(u'The page doesn\'t contain any tracks') entries = [ self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key()) for t_path in tracks_paths] diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py index 144ce64cc..0229840a3 100644 --- a/youtube_dl/extractor/blinkx.py +++ b/youtube_dl/extractor/blinkx.py @@ -61,9 +61,10 @@ class BlinkxIE(InfoExtractor): elif m['type'] in ('flv', 'mp4'): vcodec = remove_start(m['vcodec'], 'ff') acodec = remove_start(m['acodec'], 'ff') + tbr = (int(m['vbr']) + int(m['abr'])) // 1000 format_id = (u'%s-%sk-%s' % (vcodec, - (int(m['vbr']) + int(m['abr'])) // 1000, + tbr, m['w'])) formats.append({ 'format_id': format_id, @@ -72,10 +73,12 @@ class BlinkxIE(InfoExtractor): 'acodec': acodec, 'abr': int(m['abr']) // 1000, 'vbr': int(m['vbr']) // 1000, + 'tbr': tbr, 'width': int(m['w']), 'height': int(m['h']), }) - formats.sort(key=lambda f: (f['width'], f['vbr'], f['abr'])) + + self._sort_formats(formats) return { 'id': display_id, diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index ae70ea229..574881b70 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -76,14 +76,18 @@ class Channel9IE(InfoExtractor): </div>)? # File size part may be missing ''' # Extract known formats - formats = [{'url': x.group('url'), - 'format_id': x.group('quality'), - 'format_note': x.group('note'), - 'format': '%s (%s)' % (x.group('quality'), x.group('note')), - 'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate - } for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats] - # Sort according to known formats list - formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id'])) + formats = [{ + 'url': x.group('url'), + 'format_id': x.group('quality'), + 'format_note': x.group('note'), + 'format': u'%s (%s)' % (x.group('quality'), x.group('note')), + 'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate + 'preference': self._known_formats.index(x.group('quality')), + 'vcodec': 'none' if x.group('note') == 'Audio only' else None, + } for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats] + + self._sort_formats(formats) + return formats def _extract_title(self, html): diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py new file mode 100644 index 000000000..88e0e9aba --- /dev/null +++ b/youtube_dl/extractor/cmt.py @@ -0,0 +1,19 @@ +from .mtv import MTVIE + +class CMTIE(MTVIE): + IE_NAME = u'cmt.com' + _VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml' + _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' + + _TESTS = [ + { + u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', + u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2', + u'info_dict': { + u'id': u'989124', + u'ext': u'mp4', + u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"', + u'description': u'Blame It All On My Roots', + }, + }, + ] diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index a034bb2fb..ecac5e0e9 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -1,7 +1,10 @@ import re from .common import InfoExtractor -from ..utils import determine_ext +from ..utils import ( + int_or_none, + parse_duration, +) class CNNIE(InfoExtractor): @@ -15,6 +18,8 @@ class CNNIE(InfoExtractor): u'info_dict': { u'title': u'Nadal wins 8th French Open title', u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', + u'duration': 135, + u'upload_date': u'20130609', }, }, { @@ -35,22 +40,58 @@ class CNNIE(InfoExtractor): info = self._download_xml(info_url, page_title) formats = [] + rex = re.compile(r'''(?x) + (?P<width>[0-9]+)x(?P<height>[0-9]+) + (?:_(?P<bitrate>[0-9]+)k)? + ''') for f in info.findall('files/file'): - mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate']) - if mf is not None: - formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text)) - formats = sorted(formats) - (_,_,_, video_path) = formats[-1] - video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path + video_url = 'http://ht.cdn.turner.com/cnn/big%s' % (f.text.strip()) + fdct = { + 'format_id': f.attrib['bitrate'], + 'url': video_url, + } + + mf = rex.match(f.attrib['bitrate']) + if mf: + fdct['width'] = int(mf.group('width')) + fdct['height'] = int(mf.group('height')) + fdct['tbr'] = int_or_none(mf.group('bitrate')) + else: + mf = rex.search(f.text) + if mf: + fdct['width'] = int(mf.group('width')) + fdct['height'] = int(mf.group('height')) + fdct['tbr'] = int_or_none(mf.group('bitrate')) + else: + mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate']) + if mi: + if mi.group(1) == 'audio': + fdct['vcodec'] = 'none' + fdct['ext'] = 'm4a' + else: + fdct['tbr'] = int(mi.group(1)) + + formats.append(fdct) + + self._sort_formats(formats) thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')]) thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails] - return {'id': info.attrib['id'], - 'title': info.find('headline').text, - 'url': video_url, - 'ext': determine_ext(video_url), - 'thumbnail': thumbnails[-1][1], - 'thumbnails': thumbs_dict, - 'description': info.find('description').text, - } + metas_el = info.find('metas') + upload_date = ( + metas_el.attrib.get('version') if metas_el is not None else None) + + duration_el = info.find('length') + duration = parse_duration(duration_el.text) + + return { + 'id': info.attrib['id'], + 'title': info.find('headline').text, + 'formats': formats, + 'thumbnail': thumbnails[-1][1], + 'thumbnails': thumbs_dict, + 'description': info.find('description').text, + 'duration': duration, + 'upload_date': upload_date, + } diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index a54ce3ee7..27bd8256e 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -12,7 +12,9 @@ from ..utils import ( class ComedyCentralIE(MTVServicesInfoExtractor): - _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)' + _VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/ + (video-clips|episodes|cc-studios|video-collections) + /(?P<title>.*)''' _FEED_URL = u'http://comedycentral.com/feeds/mrss/' _TEST = { diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6fa60622e..f498bcf6f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -51,7 +51,8 @@ class InfoExtractor(object): Calculated from the format_id, width, height. and format_note fields if missing. * format_id A short description of the format - ("mp4_h264_opus" or "19") + ("mp4_h264_opus" or "19"). + Technically optional, but strongly recommended. * format_note Additional info about the format ("3D" or "DASH video") * width Width of the video, if known @@ -68,7 +69,8 @@ class InfoExtractor(object): download, lower-case. "http", "https", "rtsp", "rtmp" or so. * preference Order number of this format. If this field is - present, the formats get sorted by this field. + present and not None, the formats get sorted + by this field. -1 for default (order by other properties), -2 or smaller for less than default. url: Final video URL. @@ -376,7 +378,7 @@ class InfoExtractor(object): @staticmethod def _og_regexes(prop): content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')' - property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop) + property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop) template = r'<meta[^>]+?%s[^>]+?%s' return [ template % (property_re, content_re), diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 416e25156..0b11d1f10 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -10,11 +10,11 @@ from ..utils import ( class DreiSatIE(InfoExtractor): IE_NAME = '3sat' - _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' + _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' _TEST = { u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983", - u'file': u'36983.webm', - u'md5': u'57c97d0469d71cf874f6815aa2b7c944', + u'file': u'36983.mp4', + u'md5': u'9dcfe344732808dbfcc901537973c922', u'info_dict': { u"title": u"Kaffeeland Schweiz", u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...", diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7a14c98f9..377ae91c4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -162,6 +162,8 @@ class GenericIE(InfoExtractor): return self.url_result('http://' + url) video_id = os.path.splitext(url.split('/')[-1])[0] + self.to_screen(u'%s: Requesting header' % video_id) + try: response = self._send_head(url) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index e5332cce8..16926b4d3 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -55,3 +55,32 @@ class ImdbIE(InfoExtractor): 'description': descr, 'thumbnail': format_info['slate'], } + +class ImdbListIE(InfoExtractor): + IE_NAME = u'imdb:list' + IE_DESC = u'Internet Movie Database lists' + _VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + list_id = mobj.group('id') + + # RSS XML is sometimes malformed + rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, u'Downloading list RSS') + list_title = self._html_search_regex(r'<title>(.*?)', rss, u'list title') + + # Export is independent of actual author_id, but returns 404 if no author_id is provided. + # However, passing dummy author_id seems to be enough. + csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id, + list_id, u'Downloading list CSV') + + entries = [] + for item in csv.split('\n')[1:]: + cols = item.split(',') + if len(cols) < 2: + continue + item_id = cols[1][1:-1] + if item_id.startswith('vi'): + entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb')) + + return self.playlist_result(entries, list_id, list_title) \ No newline at end of file diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 16a6f73c8..4ddda2f1b 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -5,7 +5,6 @@ from ..utils import ( compat_urlparse, compat_urllib_parse, xpath_with_ns, - determine_ext, ) @@ -63,13 +62,17 @@ class InternetVideoArchiveIE(InfoExtractor): for content in item.findall(_bp('media:group/media:content')): attr = content.attrib f_url = attr['url'] + width = int(attr['width']) + bitrate = int(attr['bitrate']) + format_id = '%d-%dk' % (width, bitrate) formats.append({ + 'format_id': format_id, 'url': f_url, - 'ext': determine_ext(f_url), - 'width': int(attr['width']), - 'bitrate': int(attr['bitrate']), + 'width': width, + 'tbr': bitrate, }) - formats = sorted(formats, key=lambda f: f['bitrate']) + + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 4bdf55f93..98d1d272a 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -84,14 +84,16 @@ class IviIE(InfoExtractor): result = video_json[u'result'] - formats = [{'url': x[u'url'], - 'format_id': x[u'content_format'] - } for x in result[u'files'] if x[u'content_format'] in self._known_formats] - formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id'])) - - if len(formats) == 0: - self._downloader.report_warning(u'No media links available for %s' % video_id) - return + formats = [{ + 'url': x[u'url'], + 'format_id': x[u'content_format'], + 'preference': self._known_formats.index(x[u'content_format']), + } for x in result[u'files'] if x[u'content_format'] in self._known_formats] + + self._sort_formats(formats) + + if not formats: + raise ExtractorError(u'No media links available for %s' % video_id) duration = result[u'duration'] compilation = result[u'compilation'] diff --git a/youtube_dl/extractor/jpopsukitv.py b/youtube_dl/extractor/jpopsukitv.py new file mode 100644 index 000000000..aad782578 --- /dev/null +++ b/youtube_dl/extractor/jpopsukitv.py @@ -0,0 +1,73 @@ +# coding=utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + unified_strdate, +) + + +class JpopsukiIE(InfoExtractor): + IE_NAME = 'jpopsuki.tv' + _VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/video/(.*?)/(?P\S+)' + + _TEST = { + 'url': 'http://www.jpopsuki.tv/video/ayumi-hamasaki---evolution/00be659d23b0b40508169cdee4545771', + 'md5': '88018c0c1a9b1387940e90ec9e7e198e', + 'file': '00be659d23b0b40508169cdee4545771.mp4', + 'info_dict': { + 'id': '00be659d23b0b40508169cdee4545771', + 'title': 'ayumi hamasaki - evolution', + 'description': 'Release date: 2001.01.31\r\n浜崎あゆみ - evolution', + 'thumbnail': 'http://www.jpopsuki.tv/cache/89722c74d2a2ebe58bcac65321c115b2.jpg', + 'uploader': 'plama_chan', + 'uploader_id': '404', + 'upload_date': '20121101' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + video_url = 'http://www.jpopsuki.tv' + self._html_search_regex( + r'from: uploaded: (.*?)', webpage, 'video upload_date', + fatal=False) + if upload_date is not None: + upload_date = unified_strdate(upload_date) + view_count_str = self._html_search_regex( + r'
  • Hits: ([0-9]+?)
  • ', webpage, 'video view_count', + fatal=False) + comment_count_str = self._html_search_regex( + r'

    ([0-9]+?) comments

    ', webpage, 'video comment_count', + fatal=False) + + return { + 'id': video_id, + 'url': video_url, + 'title': video_title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'upload_date': upload_date, + 'view_count': int_or_none(view_count_str), + 'comment_count': int_or_none(comment_count_str), + } diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index dd59aa3e6..592ed747a 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re import json @@ -6,17 +8,17 @@ from ..utils import ExtractorError class LyndaIE(InfoExtractor): - IE_NAME = u'lynda' - IE_DESC = u'lynda.com videos' + IE_NAME = 'lynda' + IE_DESC = 'lynda.com videos' _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html' _TEST = { - u'url': u'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', - u'file': u'114408.mp4', - u'md5': u'ecfc6862da89489161fb9cd5f5a6fac1', + 'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', + 'file': '114408.mp4', + 'md5': 'ecfc6862da89489161fb9cd5f5a6fac1', u"info_dict": { - u'title': u'Using the exercise files', - u'duration': 68 + 'title': 'Using the exercise files', + 'duration': 68 } } @@ -25,26 +27,26 @@ class LyndaIE(InfoExtractor): video_id = mobj.group(1) page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, - video_id, u'Downloading video JSON') + video_id, 'Downloading video JSON') video_json = json.loads(page) - if u'Status' in video_json and video_json[u'Status'] == u'NotFound': - raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) + if 'Status' in video_json and video_json['Status'] == 'NotFound': + raise ExtractorError('Video %s does not exist' % video_id, expected=True) - if video_json[u'HasAccess'] is False: - raise ExtractorError(u'Video %s is only available for members' % video_id, expected=True) + if video_json['HasAccess'] is False: + raise ExtractorError('Video %s is only available for members' % video_id, expected=True) - video_id = video_json[u'ID'] - duration = video_json[u'DurationInSeconds'] - title = video_json[u'Title'] + video_id = video_json['ID'] + duration = video_json['DurationInSeconds'] + title = video_json['Title'] - formats = [{'url': fmt[u'Url'], - 'ext': fmt[u'Extension'], - 'width': fmt[u'Width'], - 'height': fmt[u'Height'], - 'filesize': fmt[u'FileSize'], - 'format_id': fmt[u'Resolution'] - } for fmt in video_json[u'Formats']] + formats = [{'url': fmt['Url'], + 'ext': fmt['Extension'], + 'width': fmt['Width'], + 'height': fmt['Height'], + 'filesize': fmt['FileSize'], + 'format_id': fmt['Resolution'] + } for fmt in video_json['Formats']] self._sort_formats(formats) @@ -57,8 +59,8 @@ class LyndaIE(InfoExtractor): class LyndaCourseIE(InfoExtractor): - IE_NAME = u'lynda:course' - IE_DESC = u'lynda.com online courses' + IE_NAME = 'lynda:course' + IE_DESC = 'lynda.com online courses' # Course link equals to welcome/introduction video link of same course # We will recognize it as course link @@ -70,27 +72,31 @@ class LyndaCourseIE(InfoExtractor): course_id = mobj.group('courseid') page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, - course_id, u'Downloading course JSON') + course_id, 'Downloading course JSON') course_json = json.loads(page) - if u'Status' in course_json and course_json[u'Status'] == u'NotFound': - raise ExtractorError(u'Course %s does not exist' % course_id, expected=True) + if 'Status' in course_json and course_json['Status'] == 'NotFound': + raise ExtractorError('Course %s does not exist' % course_id, expected=True) unaccessible_videos = 0 videos = [] - for chapter in course_json[u'Chapters']: - for video in chapter[u'Videos']: - if video[u'HasAccess'] is not True: + for chapter in course_json['Chapters']: + for video in chapter['Videos']: + if video['HasAccess'] is not True: unaccessible_videos += 1 continue - videos.append(video[u'ID']) + videos.append(video['ID']) if unaccessible_videos > 0: - self._downloader.report_warning(u'%s videos are only available for members and will not be downloaded' % unaccessible_videos) + self._downloader.report_warning('%s videos are only available for members and will not be downloaded' % unaccessible_videos) - entries = [self.url_result('http://www.lynda.com/%s/%s-4.html' % (course_path, video_id), 'Lynda') for video_id in videos] + entries = [ + self.url_result('http://www.lynda.com/%s/%s-4.html' % + (course_path, video_id), + 'Lynda') + for video_id in videos] - course_title = course_json[u'Title'] + course_title = course_json['Title'] - return self.playlist_result(entries, course_id, course_title) \ No newline at end of file + return self.playlist_result(entries, course_id, course_title) diff --git a/youtube_dl/extractor/macgamestore.py b/youtube_dl/extractor/macgamestore.py new file mode 100644 index 000000000..b818cf50c --- /dev/null +++ b/youtube_dl/extractor/macgamestore.py @@ -0,0 +1,43 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class MacGameStoreIE(InfoExtractor): + IE_NAME = 'macgamestore' + IE_DESC = 'MacGameStore trailers' + _VALID_URL = r'https?://www\.macgamestore\.com/mediaviewer\.php\?trailer=(?P\d+)' + + _TEST = { + 'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450', + 'file': '2450.m4v', + 'md5': '8649b8ea684b6666b4c5be736ecddc61', + 'info_dict': { + 'title': 'Crow', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id, 'Downloading trailer page') + + if re.search(r'>Missing Media<', webpage) is not None: + raise ExtractorError('Trailer %s does not exist' % video_id, expected=True) + + video_title = self._html_search_regex( + r'MacGameStore: (.*?) Trailer', webpage, 'title') + + video_url = self._html_search_regex( + r'(?s)', + webpage, 'video URL') + + return { + 'id': video_id, + 'url': video_url, + 'title': video_title + } diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 125d81551..7c54ea0f4 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -53,7 +53,7 @@ class MixcloudIE(InfoExtractor): info = json.loads(json_data) preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url') - song_url = preview_url.replace('/previews/', '/cloudcasts/originals/') + song_url = preview_url.replace('/previews/', '/c/originals/') template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) final_song_url = self._get_url(template_url) if final_song_url is None: diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index ed11f521a..f1cf41e2d 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -129,7 +129,7 @@ class MTVIE(MTVServicesInfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') - uri = mobj.group('mgid') + uri = mobj.groupdict().get('mgid') if uri is None: webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index e22ff9c38..951e977bd 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor): (?!sets/)(?P[\w\d-]+)/? (?P<token>[^?]+?)?(?:[?].*)?$) |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)) - |(?P<widget>w\.soundcloud\.com/player/?.*?url=.*) + |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*) ) ''' IE_NAME = u'soundcloud' @@ -193,7 +193,7 @@ class SoundcloudIE(InfoExtractor): if track_id is not None: info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID full_title = track_id - elif mobj.group('widget'): + elif mobj.group('player'): query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) return self.url_result(query['url'][0], ie='Soundcloud') else: diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index cec65261b..23172143e 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -55,15 +55,21 @@ class ThePlatformIE(InfoExtractor): formats = [] for f in switch.findall(_x('smil:video')): attr = f.attrib + width = int(attr['width']) + height = int(attr['height']) + vbr = int(attr['system-bitrate']) // 1000 + format_id = '%dx%d_%dk' % (width, height, vbr) formats.append({ + 'format_id': format_id, 'url': base_url, 'play_path': 'mp4:' + attr['src'], 'ext': 'flv', - 'width': int(attr['width']), - 'height': int(attr['height']), - 'vbr': int(attr['system-bitrate']), + 'width': width, + 'height': height, + 'vbr': vbr, }) - formats.sort(key=lambda f: (f['height'], f['width'], f['vbr'])) + + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 584550455..bc31c2e64 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -44,6 +44,7 @@ class WistiaIE(InfoExtractor): 'height': a['height'], 'filesize': a['size'], 'ext': a['ext'], + 'preference': 1 if atype == 'original' else None, }) self._sort_formats(formats) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index bd0f2cae0..77ad423c4 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -1,5 +1,4 @@ import json -import os import re import sys @@ -16,6 +15,7 @@ from ..aes import ( aes_decrypt_text ) + class YouPornIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))' _TEST = { @@ -23,9 +23,9 @@ class YouPornIE(InfoExtractor): u'file': u'505835.mp4', u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89', u'info_dict': { - u"upload_date": u"20101221", - u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", - u"uploader": u"Ask Dan And Jennifer", + u"upload_date": u"20101221", + u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", + u"uploader": u"Ask Dan And Jennifer", u"title": u"Sex Ed: Is It Safe To Masturbate Daily?", u"age_limit": 18, } @@ -71,38 +71,36 @@ class YouPornIE(InfoExtractor): link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8') links.append(link) - if not links: - raise ExtractorError(u'ERROR: no known formats available for video') - formats = [] for link in links: - # A link looks like this: # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0 # A path looks like this: # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4 video_url = unescapeHTML(link) path = compat_urllib_parse_urlparse(video_url).path - extension = os.path.splitext(path)[1][1:] - format = path.split('/')[4].split('_')[:2] + format_parts = path.split('/')[4].split('_')[:2] - # size = format[0] - # bitrate = format[1] - format = "-".join(format) - # title = u'%s-%s-%s' % (video_title, size, bitrate) + dn = compat_urllib_parse_urlparse(video_url).netloc.partition('.')[0] + + resolution = format_parts[0] + height = int(resolution[:-len('p')]) + bitrate = int(format_parts[1][:-len('k')]) + format = u'-'.join(format_parts) + u'-' + dn formats.append({ 'url': video_url, - 'ext': extension, 'format': format, 'format_id': format, + 'height': height, + 'tbr': bitrate, + 'resolution': resolution, }) - # Sort and remove doubles - formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-')))) - for i in range(len(formats)-1,0,-1): - if formats[i]['format_id'] == formats[i-1]['format_id']: - del formats[i] + self._sort_formats(formats) + + if not formats: + raise ExtractorError(u'ERROR: no known formats available for video') return { 'id': video_id, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b0e29c2a8..9424d5e26 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -194,6 +194,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40}, '138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40}, '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40}, + '264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40}, # Dash mp4 audio '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50}, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4c8bdbb0c..fc10fba63 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -539,7 +539,8 @@ def formatSeconds(secs): else: return '%d' % secs -def make_HTTPS_handler(opts_no_check_certificate): + +def make_HTTPS_handler(opts_no_check_certificate, **kwargs): if sys.version_info < (3, 2): import httplib @@ -560,7 +561,7 @@ def make_HTTPS_handler(opts_no_check_certificate): class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler): def https_open(self, req): return self.do_open(HTTPSConnectionV3, req) - return HTTPSHandlerV3() + return HTTPSHandlerV3(**kwargs) else: context = ssl.SSLContext(ssl.PROTOCOL_SSLv3) context.verify_mode = (ssl.CERT_NONE @@ -571,7 +572,7 @@ def make_HTTPS_handler(opts_no_check_certificate): context.load_default_certs() except AttributeError: pass # Python < 3.4 - return compat_urllib_request.HTTPSHandler(context=context) + return compat_urllib_request.HTTPSHandler(context=context, **kwargs) class ExtractorError(Exception): """Error during info extraction.""" @@ -1102,3 +1103,24 @@ class HEADRequest(compat_urllib_request.Request): def int_or_none(v): return v if v is None else int(v) + + +def parse_duration(s): + if s is None: + return None + + m = re.match( + r'(?:(?:(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)$', s) + if not m: + return None + res = int(m.group('secs')) + if m.group('mins'): + res += int(m.group('mins')) * 60 + if m.group('hours'): + res += int(m.group('hours')) * 60 * 60 + return res + + +def prepend_extension(filename, ext): + name, real_ext = os.path.splitext(filename) + return u'{0}.{1}{2}'.format(name, ext, real_ext) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b3d015634..bf5fc8212 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.12.23.4' +__version__ = '2014.01.03'