From: Philipp Hagemeister
Date: Wed, 4 Dec 2013 18:56:05 +0000 (+0100)
Subject: Merge remote-tracking branch 'dstftw/correct-valid-urls'
X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=29030c0a4c2f4dded5a310add940aae0791f9d73;hp=c0ade33e167d1668c4aa8a6684e7083e6c71dd6e;p=youtube-dl
Merge remote-tracking branch 'dstftw/correct-valid-urls'
---
diff --git a/README.md b/README.md
index 031e436b6..029c418d1 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,8 @@ which means you can modify it, redistribute it or use it however you like.
--list-extractors List all supported extractors and the URLs they
would handle
--extractor-descriptions Output descriptions of all supported extractors
- --proxy URL Use the specified HTTP/HTTPS proxy
+ --proxy URL Use the specified HTTP/HTTPS proxy. Pass in an
+ empty string (--proxy "") for direct connection
--no-check-certificate Suppress HTTPS certificate validation.
--cache-dir DIR Location in the filesystem where youtube-dl can
store downloaded information permanently. By
@@ -55,7 +56,7 @@ which means you can modify it, redistribute it or use it however you like.
--dateafter DATE download only videos uploaded after this date
--no-playlist download only the currently playing video
--age-limit YEARS download only videos suitable for the given age
- --download-archive FILE Download only videos not present in the archive
+ --download-archive FILE Download only videos not listed in the archive
file. Record the IDs of all downloaded videos in
it.
@@ -183,7 +184,7 @@ which means you can modify it, redistribute it or use it however you like.
# CONFIGURATION
-You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`.
+You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\\youtube-dl.conf`.
# OUTPUT TEMPLATE
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 2eeef2ae9..d2446b670 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -81,11 +81,11 @@ from .PostProcessor import (
def parseOpts(overrideArguments=None):
- def _readOptions(filename_bytes):
+ def _readOptions(filename_bytes, default=[]):
try:
optionf = open(filename_bytes)
except IOError:
- return [] # silently skip if file is not present
+ return default # silently skip if file is not present
try:
res = []
for l in optionf:
@@ -191,7 +191,9 @@ def parseOpts(overrideArguments=None):
general.add_option('--extractor-descriptions',
action='store_true', dest='list_extractor_descriptions',
help='Output descriptions of all supported extractors', default=False)
- general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
+ general.add_option(
+ '--proxy', dest='proxy', default=None, metavar='URL',
+ help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
general.add_option(
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
@@ -224,7 +226,7 @@ def parseOpts(overrideArguments=None):
default=None, type=int)
selection.add_option('--download-archive', metavar='FILE',
dest='download_archive',
- help='Download only videos not present in the archive file. Record the IDs of all downloaded videos in it.')
+ help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
authentication.add_option('-u', '--username',
@@ -419,6 +421,8 @@ def parseOpts(overrideArguments=None):
if opts.verbose:
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
else:
+ systemConf = _readOptions('/etc/youtube-dl.conf')
+
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
if xdg_config_home:
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
@@ -428,8 +432,31 @@ def parseOpts(overrideArguments=None):
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
- systemConf = _readOptions('/etc/youtube-dl.conf')
- userConf = _readOptions(userConfFile)
+ userConf = _readOptions(userConfFile, None)
+
+ if userConf is None:
+ appdata_dir = os.environ.get('appdata')
+ if appdata_dir:
+ userConf = _readOptions(
+ os.path.join(appdata_dir, 'youtube-dl', 'config'),
+ default=None)
+ if userConf is None:
+ userConf = _readOptions(
+ os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
+ default=None)
+
+ if userConf is None:
+ userConf = _readOptions(
+ os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
+ default=None)
+ if userConf is None:
+ userConf = _readOptions(
+ os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
+ default=None)
+
+ if userConf is None:
+ userConf = []
+
commandLineConf = sys.argv[1:]
argv = systemConf + userConf + commandLineConf
opts, args = parser.parse_args(argv)
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py
index 5b522552a..a527f10de 100644
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -113,7 +113,7 @@ class AppleTrailersIE(InfoExtractor):
})
formats = sorted(formats, key=lambda f: (f['height'], f['width']))
- info = {
+ playlist.append({
'_type': 'video',
'id': video_id,
'title': title,
@@ -124,12 +124,7 @@ class AppleTrailersIE(InfoExtractor):
'upload_date': upload_date,
'uploader_id': uploader_id,
'user_agent': 'QuickTime compatible (youtube-dl)',
- }
- # TODO: Remove when #980 has been merged
- info['url'] = formats[-1]['url']
- info['ext'] = formats[-1]['ext']
-
- playlist.append(info)
+ })
return {
'_type': 'playlist',
diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py
index a8394bfb0..8bb546410 100644
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dl/extractor/archiveorg.py
@@ -49,7 +49,7 @@ class ArchiveOrgIE(InfoExtractor):
for f in formats:
f['ext'] = determine_ext(f['url'])
- info = {
+ return {
'_type': 'video',
'id': video_id,
'title': title,
@@ -57,12 +57,5 @@ class ArchiveOrgIE(InfoExtractor):
'description': description,
'uploader': uploader,
'upload_date': upload_date,
+ 'thumbnail': data.get('misc', {}).get('image'),
}
- thumbnail = data.get('misc', {}).get('image')
- if thumbnail:
- info['thumbnail'] = thumbnail
-
- # TODO: Remove when #980 has been merged
- info.update(formats[-1])
-
- return info
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py
index caea446ea..a54ce3ee7 100644
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -1,7 +1,7 @@
import re
from .common import InfoExtractor
-from .mtv import MTVIE, _media_xml_tag
+from .mtv import MTVServicesInfoExtractor
from ..utils import (
compat_str,
compat_urllib_parse,
@@ -11,8 +11,8 @@ from ..utils import (
)
-class ComedyCentralIE(MTVIE):
- _VALID_URL = r'https?://(?:www\.)?comedycentral\.com/(video-clips|episodes|cc-studios)/(?P.*)'
+class ComedyCentralIE(MTVServicesInfoExtractor):
+ _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P.*)'
_FEED_URL = u'http://comedycentral.com/feeds/mrss/'
_TEST = {
@@ -25,12 +25,6 @@ class ComedyCentralIE(MTVIE):
u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
},
}
- # Overwrite MTVIE properties we don't want
- _TESTS = []
-
- def _get_thumbnail_url(self, uri, itemdoc):
- search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
- return itemdoc.find(search_path).attrib['url']
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -197,7 +191,7 @@ class ComedyCentralShowsIE(InfoExtractor):
})
effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
- info = {
+ results.append({
'id': shortMediaId,
'formats': formats,
'uploader': showId,
@@ -205,11 +199,6 @@ class ComedyCentralShowsIE(InfoExtractor):
'title': effTitle,
'thumbnail': None,
'description': compat_str(officialTitle),
- }
-
- # TODO: Remove when #980 has been merged
- info.update(info['formats'][-1])
-
- results.append(info)
+ })
return results
diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py
index 3d1dcb793..d418ce4a8 100644
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -28,7 +28,8 @@ class DaumIE(InfoExtractor):
video_id = mobj.group(1)
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
webpage = self._download_webpage(canonical_url, video_id)
- full_id = self._search_regex(r'(.*?)
', webpage, u'description')
- info = {
+ return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'description': descr,
'thumbnail': config.find('STILL/STILL_BIG').text,
}
- # TODO: Remove when #980 has been merged
- info.update(formats[-1])
- return info
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py
index 9645b00c3..26b7d2ae5 100644
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -47,13 +47,10 @@ class GameSpotIE(InfoExtractor):
'format_id': q,
})
- info = {
+ return {
'id': data_video['guid'],
'title': compat_urllib_parse.unquote(data_video['title']),
'formats': formats,
'description': get_meta_content('description', webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}
- # TODO: Remove when #980 has been merged
- info.update(formats[-1])
- return info
diff --git a/youtube_dl/extractor/gametrailers.py b/youtube_dl/extractor/gametrailers.py
index 88f656031..d82a5d4b2 100644
--- a/youtube_dl/extractor/gametrailers.py
+++ b/youtube_dl/extractor/gametrailers.py
@@ -1,12 +1,9 @@
import re
-from .mtv import MTVIE, _media_xml_tag
+from .mtv import MTVServicesInfoExtractor
-class GametrailersIE(MTVIE):
- """
- Gametrailers use the same videos system as MTVIE, it just changes the feed
- url, where the uri is and the method to get the thumbnails.
- """
+
+class GametrailersIE(MTVServicesInfoExtractor):
_VALID_URL = r'http://www\.gametrailers\.com/(?Pvideos|reviews|full-episodes)/(?P.*?)/(?P.*)'
_TEST = {
u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
@@ -17,15 +14,9 @@ class GametrailersIE(MTVIE):
u'description': u'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
},
}
- # Overwrite MTVIE properties we don't want
- _TESTS = []
_FEED_URL = 'http://www.gametrailers.com/feeds/mrss'
- def _get_thumbnail_url(self, uri, itemdoc):
- search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
- return itemdoc.find(search_path).attrib['url']
-
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
diff --git a/youtube_dl/extractor/metacritic.py b/youtube_dl/extractor/metacritic.py
index 449138b56..6b95b4998 100644
--- a/youtube_dl/extractor/metacritic.py
+++ b/youtube_dl/extractor/metacritic.py
@@ -43,13 +43,10 @@ class MetacriticIE(InfoExtractor):
description = self._html_search_regex(r'Description:(.*?)',
webpage, u'description', flags=re.DOTALL)
- info = {
+ return {
'id': video_id,
'title': clip.find('title').text,
'formats': formats,
'description': description,
'duration': int(clip.find('duration').text),
}
- # TODO: Remove when #980 has been merged
- info.update(formats[-1])
- return info
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index 42aee58be..6b3feb560 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -10,35 +10,8 @@ from ..utils import (
def _media_xml_tag(tag):
return '{http://search.yahoo.com/mrss/}%s' % tag
-class MTVIE(InfoExtractor):
- _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P[0-9]+)/[^/]+$'
-
- _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
-
- _TESTS = [
- {
- u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
- u'file': u'853555.mp4',
- u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
- u'info_dict': {
- u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
- u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
- },
- },
- {
- u'add_ie': ['Vevo'],
- u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
- u'file': u'USCJY1331283.mp4',
- u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
- u'info_dict': {
- u'title': u'Everything Has Changed',
- u'upload_date': u'20130606',
- u'uploader': u'Taylor Swift',
- },
- u'skip': u'VEVO is only available in some countries',
- },
- ]
+class MTVServicesInfoExtractor(InfoExtractor):
@staticmethod
def _id_from_uri(uri):
return uri.split(':')[-1]
@@ -53,7 +26,12 @@ class MTVIE(InfoExtractor):
return base + m.group('finalid')
def _get_thumbnail_url(self, uri, itemdoc):
- return 'http://mtv.mtvnimages.com/uri/' + uri
+ search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
+ thumb_node = itemdoc.find(search_path)
+ if thumb_node is None:
+ return None
+ else:
+ return thumb_node.attrib['url']
def _extract_video_formats(self, metadataXml):
if '/error_country_block.swf' in metadataXml:
@@ -93,7 +71,7 @@ class MTVIE(InfoExtractor):
else:
description = None
- info = {
+ return {
'title': itemdoc.find('title').text,
'formats': self._extract_video_formats(mediagen_page),
'id': video_id,
@@ -101,11 +79,6 @@ class MTVIE(InfoExtractor):
'description': description,
}
- # TODO: Remove when #980 has been merged
- info.update(info['formats'][-1])
-
- return info
-
def _get_videos_info(self, uri):
video_id = self._id_from_uri(uri)
data = compat_urllib_parse.urlencode({'uri': uri})
@@ -113,6 +86,39 @@ class MTVIE(InfoExtractor):
u'Downloading info')
return [self._get_video_info(item) for item in idoc.findall('.//item')]
+
+class MTVIE(MTVServicesInfoExtractor):
+ _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P[0-9]+)/[^/]+$'
+
+ _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
+
+ _TESTS = [
+ {
+ u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
+ u'file': u'853555.mp4',
+ u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
+ u'info_dict': {
+ u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
+ u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
+ },
+ },
+ {
+ u'add_ie': ['Vevo'],
+ u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
+ u'file': u'USCJY1331283.mp4',
+ u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
+ u'info_dict': {
+ u'title': u'Everything Has Changed',
+ u'upload_date': u'20130606',
+ u'uploader': u'Taylor Swift',
+ },
+ u'skip': u'VEVO is only available in some countries',
+ },
+ ]
+
+ def _get_thumbnail_url(self, uri, itemdoc):
+ return 'http://mtv.mtvnimages.com/uri/' + uri
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py
index d290397c7..c012ec0cf 100644
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -56,7 +56,7 @@ class NaverIE(InfoExtractor):
'height': int(format_el.find('height').text),
})
- info = {
+ return {
'id': video_id,
'title': info.find('Subject').text,
'formats': formats,
@@ -65,6 +65,3 @@ class NaverIE(InfoExtractor):
'upload_date': info.find('WriteDate').text.replace('.', ''),
'view_count': int(info.find('PlayCount').text),
}
- # TODO: Remove when #980 has been merged
- info.update(formats[-1])
- return info
diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py
index 3bbda128e..c2254ae8a 100644
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -30,7 +30,7 @@ class RedTubeIE(InfoExtractor):
r'