From: Philipp Hagemeister Date: Wed, 4 Dec 2013 18:56:05 +0000 (+0100) Subject: Merge remote-tracking branch 'dstftw/correct-valid-urls' X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=29030c0a4c2f4dded5a310add940aae0791f9d73;hp=c0ade33e167d1668c4aa8a6684e7083e6c71dd6e;p=youtube-dl Merge remote-tracking branch 'dstftw/correct-valid-urls' --- diff --git a/README.md b/README.md index 031e436b6..029c418d1 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,8 @@ which means you can modify it, redistribute it or use it however you like. --list-extractors List all supported extractors and the URLs they would handle --extractor-descriptions Output descriptions of all supported extractors - --proxy URL Use the specified HTTP/HTTPS proxy + --proxy URL Use the specified HTTP/HTTPS proxy. Pass in an + empty string (--proxy "") for direct connection --no-check-certificate Suppress HTTPS certificate validation. --cache-dir DIR Location in the filesystem where youtube-dl can store downloaded information permanently. By @@ -55,7 +56,7 @@ which means you can modify it, redistribute it or use it however you like. --dateafter DATE download only videos uploaded after this date --no-playlist download only the currently playing video --age-limit YEARS download only videos suitable for the given age - --download-archive FILE Download only videos not present in the archive + --download-archive FILE Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it. @@ -183,7 +184,7 @@ which means you can modify it, redistribute it or use it however you like. # CONFIGURATION -You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. +You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\\youtube-dl.conf`. # OUTPUT TEMPLATE diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 2eeef2ae9..d2446b670 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -81,11 +81,11 @@ from .PostProcessor import ( def parseOpts(overrideArguments=None): - def _readOptions(filename_bytes): + def _readOptions(filename_bytes, default=[]): try: optionf = open(filename_bytes) except IOError: - return [] # silently skip if file is not present + return default # silently skip if file is not present try: res = [] for l in optionf: @@ -191,7 +191,9 @@ def parseOpts(overrideArguments=None): general.add_option('--extractor-descriptions', action='store_true', dest='list_extractor_descriptions', help='Output descriptions of all supported extractors', default=False) - general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') + general.add_option( + '--proxy', dest='proxy', default=None, metavar='URL', + help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option( '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', @@ -224,7 +226,7 @@ def parseOpts(overrideArguments=None): default=None, type=int) selection.add_option('--download-archive', metavar='FILE', dest='download_archive', - help='Download only videos not present in the archive file. Record the IDs of all downloaded videos in it.') + help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.') authentication.add_option('-u', '--username', @@ -419,6 +421,8 @@ def parseOpts(overrideArguments=None): if opts.verbose: write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n') else: + systemConf = _readOptions('/etc/youtube-dl.conf') + xdg_config_home = os.environ.get('XDG_CONFIG_HOME') if xdg_config_home: userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config') @@ -428,8 +432,31 @@ def parseOpts(overrideArguments=None): userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config') if not os.path.isfile(userConfFile): userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') - systemConf = _readOptions('/etc/youtube-dl.conf') - userConf = _readOptions(userConfFile) + userConf = _readOptions(userConfFile, None) + + if userConf is None: + appdata_dir = os.environ.get('appdata') + if appdata_dir: + userConf = _readOptions( + os.path.join(appdata_dir, 'youtube-dl', 'config'), + default=None) + if userConf is None: + userConf = _readOptions( + os.path.join(appdata_dir, 'youtube-dl', 'config.txt'), + default=None) + + if userConf is None: + userConf = _readOptions( + os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'), + default=None) + if userConf is None: + userConf = _readOptions( + os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'), + default=None) + + if userConf is None: + userConf = [] + commandLineConf = sys.argv[1:] argv = systemConf + userConf + commandLineConf opts, args = parser.parse_args(argv) diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 5b522552a..a527f10de 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -113,7 +113,7 @@ class AppleTrailersIE(InfoExtractor): }) formats = sorted(formats, key=lambda f: (f['height'], f['width'])) - info = { + playlist.append({ '_type': 'video', 'id': video_id, 'title': title, @@ -124,12 +124,7 @@ class AppleTrailersIE(InfoExtractor): 'upload_date': upload_date, 'uploader_id': uploader_id, 'user_agent': 'QuickTime compatible (youtube-dl)', - } - # TODO: Remove when #980 has been merged - info['url'] = formats[-1]['url'] - info['ext'] = formats[-1]['ext'] - - playlist.append(info) + }) return { '_type': 'playlist', diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index a8394bfb0..8bb546410 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -49,7 +49,7 @@ class ArchiveOrgIE(InfoExtractor): for f in formats: f['ext'] = determine_ext(f['url']) - info = { + return { '_type': 'video', 'id': video_id, 'title': title, @@ -57,12 +57,5 @@ class ArchiveOrgIE(InfoExtractor): 'description': description, 'uploader': uploader, 'upload_date': upload_date, + 'thumbnail': data.get('misc', {}).get('image'), } - thumbnail = data.get('misc', {}).get('image') - if thumbnail: - info['thumbnail'] = thumbnail - - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - - return info diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index caea446ea..a54ce3ee7 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -1,7 +1,7 @@ import re from .common import InfoExtractor -from .mtv import MTVIE, _media_xml_tag +from .mtv import MTVServicesInfoExtractor from ..utils import ( compat_str, compat_urllib_parse, @@ -11,8 +11,8 @@ from ..utils import ( ) -class ComedyCentralIE(MTVIE): - _VALID_URL = r'https?://(?:www\.)?comedycentral\.com/(video-clips|episodes|cc-studios)/(?P.*)' +class ComedyCentralIE(MTVServicesInfoExtractor): + _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)' _FEED_URL = u'http://comedycentral.com/feeds/mrss/' _TEST = { @@ -25,12 +25,6 @@ class ComedyCentralIE(MTVIE): u'description': u'After a certain point, breastfeeding becomes c**kblocking.', }, } - # Overwrite MTVIE properties we don't want - _TESTS = [] - - def _get_thumbnail_url(self, uri, itemdoc): - search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) - return itemdoc.find(search_path).attrib['url'] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -197,7 +191,7 @@ class ComedyCentralShowsIE(InfoExtractor): }) effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1) - info = { + results.append({ 'id': shortMediaId, 'formats': formats, 'uploader': showId, @@ -205,11 +199,6 @@ class ComedyCentralShowsIE(InfoExtractor): 'title': effTitle, 'thumbnail': None, 'description': compat_str(officialTitle), - } - - # TODO: Remove when #980 has been merged - info.update(info['formats'][-1]) - - results.append(info) + }) return results diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 3d1dcb793..d418ce4a8 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -28,7 +28,8 @@ class DaumIE(InfoExtractor): video_id = mobj.group(1) canonical_url = 'http://tvpot.daum.net/v/%s' % video_id webpage = self._download_webpage(canonical_url, video_id) - full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"', + full_id = self._search_regex( + r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]', webpage, u'full id') query = compat_urllib_parse.urlencode({'vid': full_id}) info = self._download_xml( @@ -56,7 +57,7 @@ class DaumIE(InfoExtractor): 'format_id': profile, }) - info = { + return { 'id': video_id, 'title': info.find('TITLE').text, 'formats': formats, @@ -65,6 +66,3 @@ class DaumIE(InfoExtractor): 'duration': int(info.find('DURATION').text), 'upload_date': info.find('REGDTTM').text[:8], } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 008c99699..cb7226f82 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -65,7 +65,7 @@ class DreiSatIE(InfoExtractor): return (qidx, prefer_http, format['video_bitrate']) formats.sort(key=_sortkey) - info = { + return { '_type': 'video', 'id': video_id, 'title': video_title, @@ -76,8 +76,3 @@ class DreiSatIE(InfoExtractor): 'uploader': video_uploader, 'upload_date': upload_date, } - - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - - return info diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py index 615674baf..c6ab6952e 100644 --- a/youtube_dl/extractor/faz.py +++ b/youtube_dl/extractor/faz.py @@ -44,13 +44,10 @@ class FazIE(InfoExtractor): }) descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description') - info = { + return { 'id': video_id, 'title': self._og_search_title(webpage), 'formats': formats, 'description': descr, 'thumbnail': config.find('STILL/STILL_BIG').text, } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 9645b00c3..26b7d2ae5 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -47,13 +47,10 @@ class GameSpotIE(InfoExtractor): 'format_id': q, }) - info = { + return { 'id': data_video['guid'], 'title': compat_urllib_parse.unquote(data_video['title']), 'formats': formats, 'description': get_meta_content('description', webpage), 'thumbnail': self._og_search_thumbnail(webpage), } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info diff --git a/youtube_dl/extractor/gametrailers.py b/youtube_dl/extractor/gametrailers.py index 88f656031..d82a5d4b2 100644 --- a/youtube_dl/extractor/gametrailers.py +++ b/youtube_dl/extractor/gametrailers.py @@ -1,12 +1,9 @@ import re -from .mtv import MTVIE, _media_xml_tag +from .mtv import MTVServicesInfoExtractor -class GametrailersIE(MTVIE): - """ - Gametrailers use the same videos system as MTVIE, it just changes the feed - url, where the uri is and the method to get the thumbnails. - """ + +class GametrailersIE(MTVServicesInfoExtractor): _VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)' _TEST = { u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer', @@ -17,15 +14,9 @@ class GametrailersIE(MTVIE): u'description': u'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!', }, } - # Overwrite MTVIE properties we don't want - _TESTS = [] _FEED_URL = 'http://www.gametrailers.com/feeds/mrss' - def _get_thumbnail_url(self, uri, itemdoc): - search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) - return itemdoc.find(search_path).attrib['url'] - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') diff --git a/youtube_dl/extractor/metacritic.py b/youtube_dl/extractor/metacritic.py index 449138b56..6b95b4998 100644 --- a/youtube_dl/extractor/metacritic.py +++ b/youtube_dl/extractor/metacritic.py @@ -43,13 +43,10 @@ class MetacriticIE(InfoExtractor): description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>', webpage, u'description', flags=re.DOTALL) - info = { + return { 'id': video_id, 'title': clip.find('title').text, 'formats': formats, 'description': description, 'duration': int(clip.find('duration').text), } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 42aee58be..6b3feb560 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -10,35 +10,8 @@ from ..utils import ( def _media_xml_tag(tag): return '{http://search.yahoo.com/mrss/}%s' % tag -class MTVIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$' - - _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/' - - _TESTS = [ - { - u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', - u'file': u'853555.mp4', - u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8', - u'info_dict': { - u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"', - u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', - }, - }, - { - u'add_ie': ['Vevo'], - u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', - u'file': u'USCJY1331283.mp4', - u'md5': u'73b4e7fcadd88929292fe52c3ced8caf', - u'info_dict': { - u'title': u'Everything Has Changed', - u'upload_date': u'20130606', - u'uploader': u'Taylor Swift', - }, - u'skip': u'VEVO is only available in some countries', - }, - ] +class MTVServicesInfoExtractor(InfoExtractor): @staticmethod def _id_from_uri(uri): return uri.split(':')[-1] @@ -53,7 +26,12 @@ class MTVIE(InfoExtractor): return base + m.group('finalid') def _get_thumbnail_url(self, uri, itemdoc): - return 'http://mtv.mtvnimages.com/uri/' + uri + search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) + thumb_node = itemdoc.find(search_path) + if thumb_node is None: + return None + else: + return thumb_node.attrib['url'] def _extract_video_formats(self, metadataXml): if '/error_country_block.swf' in metadataXml: @@ -93,7 +71,7 @@ class MTVIE(InfoExtractor): else: description = None - info = { + return { 'title': itemdoc.find('title').text, 'formats': self._extract_video_formats(mediagen_page), 'id': video_id, @@ -101,11 +79,6 @@ class MTVIE(InfoExtractor): 'description': description, } - # TODO: Remove when #980 has been merged - info.update(info['formats'][-1]) - - return info - def _get_videos_info(self, uri): video_id = self._id_from_uri(uri) data = compat_urllib_parse.urlencode({'uri': uri}) @@ -113,6 +86,39 @@ class MTVIE(InfoExtractor): u'Downloading info') return [self._get_video_info(item) for item in idoc.findall('.//item')] + +class MTVIE(MTVServicesInfoExtractor): + _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$' + + _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/' + + _TESTS = [ + { + u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', + u'file': u'853555.mp4', + u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8', + u'info_dict': { + u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"', + u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', + }, + }, + { + u'add_ie': ['Vevo'], + u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', + u'file': u'USCJY1331283.mp4', + u'md5': u'73b4e7fcadd88929292fe52c3ced8caf', + u'info_dict': { + u'title': u'Everything Has Changed', + u'upload_date': u'20130606', + u'uploader': u'Taylor Swift', + }, + u'skip': u'VEVO is only available in some countries', + }, + ] + + def _get_thumbnail_url(self, uri, itemdoc): + return 'http://mtv.mtvnimages.com/uri/' + uri + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index d290397c7..c012ec0cf 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -56,7 +56,7 @@ class NaverIE(InfoExtractor): 'height': int(format_el.find('height').text), }) - info = { + return { 'id': video_id, 'title': info.find('Subject').text, 'formats': formats, @@ -65,6 +65,3 @@ class NaverIE(InfoExtractor): 'upload_date': info.find('WriteDate').text.replace('.', ''), 'view_count': int(info.find('PlayCount').text), } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 3bbda128e..c2254ae8a 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -30,7 +30,7 @@ class RedTubeIE(InfoExtractor): r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL') video_title = self._html_search_regex( - r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>', + r'<h1 class="videoTitle[^"]*">(.+?)</h1>', webpage, u'title') # No self-labeling, but they describe themselves as diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py index a711531e6..fd90cc5dd 100644 --- a/youtube_dl/extractor/southparkstudios.py +++ b/youtube_dl/extractor/southparkstudios.py @@ -1,15 +1,14 @@ import re -from .mtv import MTVIE, _media_xml_tag +from .mtv import MTVServicesInfoExtractor -class SouthParkStudiosIE(MTVIE): +class SouthParkStudiosIE(MTVServicesInfoExtractor): IE_NAME = u'southparkstudios.com' _VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' - # Overwrite MTVIE properties we don't want _TESTS = [{ u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4', @@ -19,14 +18,6 @@ class SouthParkStudiosIE(MTVIE): }, }] - def _get_thumbnail_url(self, uri, itemdoc): - search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) - thumb_node = itemdoc.find(search_path) - if thumb_node is None: - return None - else: - return thumb_node.attrib['url'] - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) url = u'http://www.' + mobj.group(u'url') diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py index 1c49e580d..d64aaa41f 100644 --- a/youtube_dl/extractor/trilulilu.py +++ b/youtube_dl/extractor/trilulilu.py @@ -55,7 +55,7 @@ class TriluliluIE(InfoExtractor): for fnode in format_doc.findall('./formats/format') ] - info = { + return { '_type': 'video', 'id': video_id, 'formats': formats, @@ -64,7 +64,3 @@ class TriluliluIE(InfoExtractor): 'thumbnail': thumbnail, } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - - return info diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py index 36d1bde08..138a35b2a 100644 --- a/youtube_dl/extractor/viddler.py +++ b/youtube_dl/extractor/viddler.py @@ -47,7 +47,7 @@ class ViddlerIE(InfoExtractor): r"thumbnail\s*:\s*'([^']*)'", webpage, u'thumbnail', fatal=False) - info = { + return { '_type': 'video', 'id': video_id, 'title': title, @@ -56,9 +56,3 @@ class ViddlerIE(InfoExtractor): 'duration': duration, 'formats': formats, } - - # TODO: Remove when #980 has been merged - info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url']) - info.update(info['formats'][-1]) - - return info diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 7444d3393..279f75e7a 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -26,7 +26,7 @@ class XHamsterIE(InfoExtractor): { u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', u'file': u'2221348.flv', - u'md5': u'e767b9475de189320f691f49c679c4c7', + u'md5': u'970a94178ca4118c5aa3aaea21211b81', u'info_dict': { u"upload_date": u"20130914", u"uploader_id": u"jojo747400", diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 765b4a9bf..7fff761bd 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -336,7 +336,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): u"uploader": u"Philipp Hagemeister", u"uploader_id": u"phihag", u"upload_date": u"20121002", - u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." + u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." } }, { @@ -1366,6 +1366,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # description video_description = get_element_by_id("eow-description", video_webpage) if video_description: + video_description = re.sub(r'''(?x) + <a\s+ + (?:[a-zA-Z-]+="[^"]+"\s+)*? + title="([^"]+)"\s+ + (?:[a-zA-Z-]+="[^"]+"\s+)*? + class="yt-uix-redirect-link"\s*> + [^<]+ + </a> + ''', r'\1', video_description) video_description = clean_html(video_description) else: fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage) @@ -1765,6 +1774,7 @@ class YoutubeSearchIE(SearchInfoExtractor): return self.playlist_result(videos, query) class YoutubeSearchDateIE(YoutubeSearchIE): + IE_NAME = YoutubeSearchIE.IE_NAME + ':date' _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' _SEARCH_KEY = 'ytsearchdate' IE_DESC = u'YouTube.com searches, newest videos first' diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d8f341ab9..68b30bfd4 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.12.02' +__version__ = '2013.12.04'