projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Allow to select videos to download by their upload dates (related #137)
[youtube-dl]
/
youtube_dl
/
InfoExtractors.py
diff --git
a/youtube_dl/InfoExtractors.py
b/youtube_dl/InfoExtractors.py
index 208b44887545ee1bba04e598787e6c574d5bd5cf..936af9cb4bf32cedace3e90b4809a1ce507f2898 100755
(executable)
--- a/
youtube_dl/InfoExtractors.py
+++ b/
youtube_dl/InfoExtractors.py
@@
-114,8
+114,8
@@
class InfoExtractor(object):
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
""" Returns the response handle """
if note is None:
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
""" Returns the response handle """
if note is None:
- note = u'Downloading video webpage'
- if note is not False:
+ self.report_download_webpage(video_id)
+
el
if note is not False:
self.to_screen(u'%s: %s' % (video_id, note))
try:
return compat_urllib_request.urlopen(url_or_request)
self.to_screen(u'%s: %s' % (video_id, note))
try:
return compat_urllib_request.urlopen(url_or_request)
@@
-152,6
+152,10
@@
class InfoExtractor(object):
"""Report information extraction."""
self.to_screen(u'%s: Extracting information' % id_or_name)
"""Report information extraction."""
self.to_screen(u'%s: Extracting information' % id_or_name)
+ def report_download_webpage(self, video_id):
+ """Report webpage download."""
+ self.to_screen(u'%s: Downloading webpage' % video_id)
+
def report_age_confirmation(self):
"""Report attempt to confirm age."""
self.to_screen(u'Confirming age')
def report_age_confirmation(self):
"""Report attempt to confirm age."""
self.to_screen(u'Confirming age')
@@
-570,7
+574,11
@@
class YoutubeIE(InfoExtractor):
if video_description:
video_description = clean_html(video_description)
else:
if video_description:
video_description = clean_html(video_description)
else:
- video_description = u''
+ fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
+ if fd_mobj:
+ video_description = unescapeHTML(fd_mobj.group(1))
+ else:
+ video_description = u''
# subtitles
video_subtitles = None
# subtitles
video_subtitles = None
@@
-680,17
+688,10
@@
class MetacafeIE(InfoExtractor):
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
IE_NAME = u'metacafe'
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
IE_NAME = u'metacafe'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
def report_disclaimer(self):
"""Report disclaimer retrieval."""
self.to_screen(u'Retrieving disclaimer')
def report_disclaimer(self):
"""Report disclaimer retrieval."""
self.to_screen(u'Retrieving disclaimer')
- def report_download_webpage(self, video_id):
- """Report webpage download."""
- self.to_screen(u'%s: Downloading webpage' % video_id)
-
def _real_initialize(self):
# Retrieve disclaimer
request = compat_urllib_request.Request(self._DISCLAIMER)
def _real_initialize(self):
# Retrieve disclaimer
request = compat_urllib_request.Request(self._DISCLAIMER)
@@
-791,9
+792,6
@@
class DailymotionIE(InfoExtractor):
IE_NAME = u'dailymotion'
_WORKING = False
IE_NAME = u'dailymotion'
_WORKING = False
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
def _real_extract(self, url):
# Extract id and simplified title from URL
mobj = re.match(self._VALID_URL, url)
def _real_extract(self, url):
# Extract id and simplified title from URL
mobj = re.match(self._VALID_URL, url)
@@
-875,13
+873,6
@@
class PhotobucketIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
IE_NAME = u'photobucket'
_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
IE_NAME = u'photobucket'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
- def report_download_webpage(self, video_id):
- """Report webpage download."""
- self.to_screen(u'%s: Downloading webpage' % video_id)
-
def _real_extract(self, url):
# Extract id from URL
mobj = re.match(self._VALID_URL, url)
def _real_extract(self, url):
# Extract id from URL
mobj = re.match(self._VALID_URL, url)
@@
-940,13
+931,6
@@
class YahooIE(InfoExtractor):
_VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
IE_NAME = u'video.yahoo'
_VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
IE_NAME = u'video.yahoo'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
- def report_download_webpage(self, video_id):
- """Report webpage download."""
- self.to_screen(u'%s: Downloading webpage' % video_id)
-
def _real_extract(self, url, new_video=True):
# Extract ID from URL
mobj = re.match(self._VALID_URL, url)
def _real_extract(self, url, new_video=True):
# Extract ID from URL
mobj = re.match(self._VALID_URL, url)
@@
-1076,13
+1060,6
@@
class VimeoIE(InfoExtractor):
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
IE_NAME = u'vimeo'
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
IE_NAME = u'vimeo'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
- def report_download_webpage(self, video_id):
- """Report webpage download."""
- self.to_screen(u'%s: Downloading webpage' % video_id)
-
def _real_extract(self, url, new_video=True):
# Extract ID from URL
mobj = re.match(self._VALID_URL, url)
def _real_extract(self, url, new_video=True):
# Extract ID from URL
mobj = re.match(self._VALID_URL, url)
@@
-1116,7
+1093,10
@@
class VimeoIE(InfoExtractor):
config = webpage.split(' = {config:')[1].split(',assets:')[0]
config = json.loads(config)
except:
config = webpage.split(' = {config:')[1].split(',assets:')[0]
config = json.loads(config)
except:
- self._downloader.report_error(u'unable to extract info section')
+ if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
+ self._downloader.report_error(u'The author has restricted the access to this video, try with the "--referer" option')
+ else:
+ self._downloader.report_error(u'unable to extract info section')
return
# Extract title
return
# Extract title
@@
-1193,13
+1173,6
@@
class ArteTvIE(InfoExtractor):
IE_NAME = u'arte.tv'
IE_NAME = u'arte.tv'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
- def report_download_webpage(self, video_id):
- """Report webpage download."""
- self.to_screen(u'%s: Downloading webpage' % video_id)
-
def fetch_webpage(self, url):
request = compat_urllib_request.Request(url)
try:
def fetch_webpage(self, url):
request = compat_urllib_request.Request(url)
try:
@@
-1323,14
+1296,11
@@
class GenericIE(InfoExtractor):
_VALID_URL = r'.*'
IE_NAME = u'generic'
_VALID_URL = r'.*'
IE_NAME = u'generic'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
def report_download_webpage(self, video_id):
"""Report webpage download."""
if not self._downloader.params.get('test', False):
self._downloader.report_warning(u'Falling back on generic information extractor.')
def report_download_webpage(self, video_id):
"""Report webpage download."""
if not self._downloader.params.get('test', False):
self._downloader.report_warning(u'Falling back on generic information extractor.')
- s
elf.to_screen(u'%s: Downloading webpage' %
video_id)
+ s
uper(GenericIE, self).report_download_webpage(
video_id)
def report_following_redirect(self, new_url):
"""Report information extraction."""
def report_following_redirect(self, new_url):
"""Report information extraction."""
@@
-1465,9
+1435,6
@@
class YoutubeSearchIE(InfoExtractor):
_max_youtube_results = 1000
IE_NAME = u'youtube:search'
_max_youtube_results = 1000
IE_NAME = u'youtube:search'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
def report_download_page(self, query, pagenum):
"""Report attempt to download search page with given number."""
query = query.decode(preferredencoding())
def report_download_page(self, query, pagenum):
"""Report attempt to download search page with given number."""
query = query.decode(preferredencoding())
@@
-1542,9
+1509,6
@@
class GoogleSearchIE(InfoExtractor):
_max_google_results = 1000
IE_NAME = u'video.google:search'
_max_google_results = 1000
IE_NAME = u'video.google:search'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
def report_download_page(self, query, pagenum):
"""Report attempt to download playlist page with given number."""
query = query.decode(preferredencoding())
def report_download_page(self, query, pagenum):
"""Report attempt to download playlist page with given number."""
query = query.decode(preferredencoding())
@@
-1626,9
+1590,6
@@
class YahooSearchIE(InfoExtractor):
_max_yahoo_results = 1000
IE_NAME = u'video.yahoo:search'
_max_yahoo_results = 1000
IE_NAME = u'video.yahoo:search'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
def report_download_page(self, query, pagenum):
"""Report attempt to download playlist page with given number."""
query = query.decode(preferredencoding())
def report_download_page(self, query, pagenum):
"""Report attempt to download playlist page with given number."""
query = query.decode(preferredencoding())
@@
-1722,9
+1683,6
@@
class YoutubePlaylistIE(InfoExtractor):
_MAX_RESULTS = 50
IE_NAME = u'youtube:playlist'
_MAX_RESULTS = 50
IE_NAME = u'youtube:playlist'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
@classmethod
def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE."""
@classmethod
def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE."""
@@
-1765,12
+1723,11
@@
class YoutubePlaylistIE(InfoExtractor):
if 'feed' not in response:
self._downloader.report_error(u'Got a malformed response from YouTube API')
return
if 'feed' not in response:
self._downloader.report_error(u'Got a malformed response from YouTube API')
return
+ playlist_title = response['feed']['title']['$t']
if 'entry' not in response['feed']:
# Number of videos is a multiple of self._MAX_RESULTS
break
if 'entry' not in response['feed']:
# Number of videos is a multiple of self._MAX_RESULTS
break
- playlist_title = response['feed']['title']['$t']
-
videos += [ (entry['yt$position']['$t'], entry['content']['src'])
for entry in response['feed']['entry']
if 'content' in entry ]
videos += [ (entry['yt$position']['$t'], entry['content']['src'])
for entry in response['feed']['entry']
if 'content' in entry ]
@@
-1869,9
+1826,6
@@
class YoutubeUserIE(InfoExtractor):
_VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
IE_NAME = u'youtube:user'
_VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
IE_NAME = u'youtube:user'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
def report_download_page(self, username, start_index):
"""Report attempt to download user page."""
self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
def report_download_page(self, username, start_index):
"""Report attempt to download user page."""
self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
@@
-1938,9
+1892,6
@@
class BlipTVUserIE(InfoExtractor):
_PAGE_SIZE = 12
IE_NAME = u'blip.tv:user'
_PAGE_SIZE = 12
IE_NAME = u'blip.tv:user'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
def report_download_page(self, username, pagenum):
"""Report attempt to download user page."""
self.to_screen(u'user %s: Downloading video ids from page %d' %
def report_download_page(self, username, pagenum):
"""Report attempt to download user page."""
self.to_screen(u'user %s: Downloading video ids from page %d' %
@@
-2016,10
+1967,6
@@
class DepositFilesIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
_VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
- def report_download_webpage(self, file_id):
- """Report webpage download."""
- self.to_screen(u'%s: Downloading webpage' % file_id)
-
def _real_extract(self, url):
file_id = url.split('/')[-1]
# Rebuild url in english locale
def _real_extract(self, url):
file_id = url.split('/')[-1]
# Rebuild url in english locale
@@
-2270,9
+2217,6
@@
class MyVideoIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
IE_NAME = u'myvideo'
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
IE_NAME = u'myvideo'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
@@
-2708,9
+2652,6
@@
class SoundcloudIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
IE_NAME = u'soundcloud'
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
IE_NAME = u'soundcloud'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
def report_resolve(self, video_id):
"""Report information extraction."""
self.to_screen(u'%s: Resolving id' % video_id)
def report_resolve(self, video_id):
"""Report information extraction."""
self.to_screen(u'%s: Resolving id' % video_id)
@@
-2777,9
+2718,6
@@
class SoundcloudSetIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
IE_NAME = u'soundcloud'
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
IE_NAME = u'soundcloud'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
def report_resolve(self, video_id):
"""Report information extraction."""
self.to_screen(u'%s: Resolving id' % video_id)
def report_resolve(self, video_id):
"""Report information extraction."""
self.to_screen(u'%s: Resolving id' % video_id)
@@
-2857,7
+2795,7
@@
class InfoQIE(InfoExtractor):
self.report_extraction(url)
# Extract video URL
self.report_extraction(url)
# Extract video URL
- mobj = re.search(r"jsclassref
=
'([^']*)'", webpage)
+ mobj = re.search(r"jsclassref
?= ?
'([^']*)'", webpage)
if mobj is None:
self._downloader.report_error(u'unable to extract video url')
return
if mobj is None:
self._downloader.report_error(u'unable to extract video url')
return
@@
-2900,9
+2838,6
@@
class MixcloudIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
IE_NAME = u'mixcloud'
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
IE_NAME = u'mixcloud'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
def report_download_json(self, file_id):
"""Report JSON download."""
self.to_screen(u'Downloading json')
def report_download_json(self, file_id):
"""Report JSON download."""
self.to_screen(u'Downloading json')
@@
-3010,10
+2945,6
@@
class StanfordOpenClassroomIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
IE_NAME = u'stanfordoc'
_VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
IE_NAME = u'stanfordoc'
- def report_download_webpage(self, objid):
- """Report information extraction."""
- self.to_screen(u'%s: Downloading webpage' % objid)
-
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
@@
-3192,10
+3123,6
@@
class MTVIE(InfoExtractor):
class YoukuIE(InfoExtractor):
_VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
class YoukuIE(InfoExtractor):
_VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
- def report_download_webpage(self, file_id):
- """Report webpage download."""
- self.to_screen(u'%s: Downloading webpage' % file_id)
-
def _gen_sid(self):
nowTime = int(time.time() * 1000)
random1 = random.randint(1000,1998)
def _gen_sid(self):
nowTime = int(time.time() * 1000)
random1 = random.randint(1000,1998)
@@
-3305,10
+3232,6
@@
class XNXXIE(InfoExtractor):
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&'
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&'
- def report_webpage(self, video_id):
- """Report information extraction"""
- self.to_screen(u'%s: Downloading webpage' % video_id)
-
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
@@
-3316,7
+3239,7
@@
class XNXXIE(InfoExtractor):
return
video_id = mobj.group(1)
return
video_id = mobj.group(1)
- self.report_webpage(video_id)
+ self.report_
download_
webpage(video_id)
# Get webpage content
try:
# Get webpage content
try:
@@
-3362,9
+3285,6
@@
class GooglePlusIE(InfoExtractor):
_VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
IE_NAME = u'plus.google'
_VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
IE_NAME = u'plus.google'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
def report_extract_entry(self, url):
"""Report downloading extry"""
self.to_screen(u'Downloading entry: %s' % url)
def report_extract_entry(self, url):
"""Report downloading extry"""
self.to_screen(u'Downloading entry: %s' % url)
@@
-3640,6
+3560,7
@@
class FunnyOrDieIE(InfoExtractor):
class SteamIE(InfoExtractor):
_VALID_URL = r"""http://store.steampowered.com/
class SteamIE(InfoExtractor):
_VALID_URL = r"""http://store.steampowered.com/
+ (agecheck/)?
(?P<urltype>video|app)/ #If the page is only for videos or for a game
(?P<gameID>\d+)/?
(?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
(?P<urltype>video|app)/ #If the page is only for videos or for a game
(?P<gameID>\d+)/?
(?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID