# -*- coding: utf-8 -*-
import datetime
-import HTMLParser
-import httplib
import netrc
import os
import re
import socket
import time
-import urllib
-import urllib2
import email.utils
import xml.etree.ElementTree
import random
import math
from urlparse import parse_qs
-try:
- import cStringIO as StringIO
-except ImportError:
- import StringIO
-
from utils import *
"""Information Extractor class.
Information extractors are the classes that, given a URL, extract
- information from the video (or videos) the URL refers to. This
- information includes the real video URL, the video title and simplified
- title, author and others. The information is stored in a dictionary
- which is then passed to the FileDownloader. The FileDownloader
- processes this information possibly downloading the video to the file
- system, among other possible outcomes. The dictionaries must include
- the following fields:
-
- id: Video identifier.
- url: Final video URL.
- uploader: Nickname of the video uploader.
- title: Literal title.
- ext: Video filename extension.
- format: Video format.
- player_url: SWF Player URL (may be None).
-
- The following fields are optional. Their primary purpose is to allow
- youtube-dl to serve as the backend for a video search function, such
- as the one in youtube2mp3. They are only used when their respective
- forced printing functions are called:
-
- thumbnail: Full URL to a video thumbnail image.
- description: One-line video description.
+ information about the video (or videos) the URL refers to. This
+ information includes the real video URL, the video title, author and
+ others. The information is stored in a dictionary which is then
+ passed to the FileDownloader. The FileDownloader processes this
+ information possibly downloading the video to the file system, among
+ other possible outcomes.
+
+ The dictionaries must include the following fields:
+
+ id: Video identifier.
+ url: Final video URL.
+ uploader: Nickname of the video uploader, unescaped.
+ upload_date: Video upload date (YYYYMMDD).
+ title: Video title, unescaped.
+ ext: Video filename extension.
+
+ The following fields are optional:
+
+ format: The video format, defaults to ext (used for --get-format)
+ thumbnail: Full URL to a video thumbnail image.
+ description: One-line video description.
+ player_url: SWF Player URL (used for rtmpdump).
+ subtitles: The .srt file contents.
+ urlhandle: [internal] The urlHandle to be used to download the file,
+ like returned by urllib.request.urlopen
+
+ The fields should all be Unicode strings.
Subclasses of this one should re-define the _real_initialize() and
_real_extract() methods and define a _VALID_URL regexp.
Probably, they should also be added to the list of extractors.
+
+ _real_extract() must return a *list* of information dictionaries as
+ described above.
+
+ Finally, the _WORKING attribute should be set to False for broken IEs
+ in order to warn the users and skip the tests.
"""
_ready = False
_downloader = None
+ _WORKING = True
def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader."""
"""Receives a URL and returns True if suitable for this IE."""
return re.match(self._VALID_URL, url) is not None
+ def working(self):
+ """Getter method for _WORKING."""
+ return self._WORKING
+
def initialize(self):
"""Initializes an instance (authentication, etc)."""
if not self._ready:
password = info[2]
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
- except (IOError, netrc.NetrcParseError), err:
+ except (IOError, netrc.NetrcParseError) as err:
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
return
# Set language
- request = urllib2.Request(self._LANG_URL)
+ request = compat_urllib_request.Request(self._LANG_URL)
try:
self.report_lang()
- urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err))
return
'username': username,
'password': password,
}
- request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
+ request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
try:
self.report_login()
- login_results = urllib2.urlopen(request).read()
+ login_results = compat_urllib_request.urlopen(request).read()
if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
return
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
return
'next_url': '/',
'action_confirm': 'Confirm',
}
- request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
+ request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
try:
self.report_age_confirmation()
- age_results = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ age_results = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
return
# Extract original video URL from URL with redirection, like age verification, using next_url parameter
mobj = re.search(self._NEXT_URL_RE, url)
if mobj:
- url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/')
+ url = 'http://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
# Extract video id from URL
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
# Get video webpage
self.report_video_webpage_download(video_id)
- request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
+ request = compat_urllib_request.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
try:
- video_webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ video_webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
% (video_id, el_type))
- request = urllib2.Request(video_info_url)
+ request = compat_urllib_request.Request(video_info_url)
try:
- video_info_webpage = urllib2.urlopen(request).read()
+ video_info_webpage = compat_urllib_request.urlopen(request).read()
video_info = parse_qs(video_info_webpage)
if 'token' in video_info:
break
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
return
if 'token' not in video_info:
if 'author' not in video_info:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
- video_uploader = urllib.unquote_plus(video_info['author'][0])
+ video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
# title
if 'title' not in video_info:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
- video_title = urllib.unquote_plus(video_info['title'][0])
+ video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
video_title = video_title.decode('utf-8')
# thumbnail image
self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
video_thumbnail = ''
else: # don't panic if we can't find it
- video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
+ video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
# upload date
- upload_date = u'NA'
+ upload_date = None
mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
if mobj is not None:
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
if self._downloader.params.get('writesubtitles', False):
try:
self.report_video_subtitles_download(video_id)
- request = urllib2.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
+ request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
try:
- srt_list = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ srt_list = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
srt_lang = srt_lang_list.keys()[0]
if not srt_lang in srt_lang_list:
raise Trouble(u'WARNING: no closed captions found in the specified language')
- request = urllib2.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id))
+ request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id))
try:
- srt_xml = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ srt_xml = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
if not srt_xml:
raise Trouble(u'WARNING: unable to download video subtitles')
self._downloader.trouble(u'WARNING: unable to extract video duration')
video_duration = ''
else:
- video_duration = urllib.unquote_plus(video_info['length_seconds'][0])
+ video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
# token
- video_token = urllib.unquote_plus(video_info['token'][0])
+ video_token = compat_urllib_parse.unquote_plus(video_info['token'][0])
# Decide which formats to download
req_format = self._downloader.params.get('format', None)
# Extension
video_extension = self._video_extensions.get(format_param, 'flv')
+ video_format = '{} - {}'.format(format_param.decode('utf-8') if format_param else video_extension.decode('utf-8'),
+ self._video_dimensions.get(format_param, '???'))
+
results.append({
'id': video_id.decode('utf-8'),
'url': video_real_url.decode('utf-8'),
'upload_date': upload_date,
'title': video_title,
'ext': video_extension.decode('utf-8'),
- 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
+ 'format': video_format,
'thumbnail': video_thumbnail.decode('utf-8'),
'description': video_description,
'player_url': player_url,
def _real_initialize(self):
# Retrieve disclaimer
- request = urllib2.Request(self._DISCLAIMER)
+ request = compat_urllib_request.Request(self._DISCLAIMER)
try:
self.report_disclaimer()
- disclaimer = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ disclaimer = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err))
return
'filters': '0',
'submit': "Continue - I'm over 18",
}
- request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
+ request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
try:
self.report_age_confirmation()
- disclaimer = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ disclaimer = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
return
return
# Retrieve video webpage to extract further information
- request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
+ request = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
try:
self.report_download_webpage(video_id)
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
return
self.report_extraction(video_id)
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
if mobj is not None:
- mediaURL = urllib.unquote(mobj.group(1))
+ mediaURL = compat_urllib_parse.unquote(mobj.group(1))
video_extension = mediaURL[-3:]
# Extract gdaKey if available
'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'),
'uploader': video_uploader.decode('utf-8'),
- 'upload_date': u'NA',
+ 'upload_date': None,
'title': video_title,
'ext': video_extension.decode('utf-8'),
- 'format': u'NA',
- 'player_url': None,
}]
video_extension = 'mp4'
# Retrieve video webpage to extract further information
- request = urllib2.Request(url)
+ request = compat_urllib_request.Request(url)
request.add_header('Cookie', 'family_filter=off')
try:
self.report_download_webpage(video_id)
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
return
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract media URL')
return
- flashvars = urllib.unquote(mobj.group(1))
+ flashvars = compat_urllib_parse.unquote(mobj.group(1))
for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']:
if key in flashvars:
self._downloader.trouble(u'ERROR: unable to extract video URL')
return
- video_url = urllib.unquote(mobj.group(1)).replace('\\/', '/')
+ video_url = compat_urllib_parse.unquote(mobj.group(1)).replace('\\/', '/')
# TODO: support choosing qualities
return
video_title = unescapeHTML(mobj.group('title').decode('utf-8'))
- video_uploader = u'NA'
+ video_uploader = None
mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage)
if mobj is None:
# lookin for official user
else:
video_uploader = mobj.group(1)
- video_upload_date = u'NA'
+ video_upload_date = None
mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
if mobj is not None:
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
'upload_date': video_upload_date,
'title': video_title,
'ext': video_extension.decode('utf-8'),
- 'format': u'NA',
- 'player_url': None,
}]
video_extension = 'mp4'
# Retrieve video webpage to extract further information
- request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
+ request = compat_urllib_request.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
try:
self.report_download_webpage(video_id)
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract media URL')
return
- mediaURL = urllib.unquote(mobj.group(1))
+ mediaURL = compat_urllib_parse.unquote(mobj.group(1))
mediaURL = mediaURL.replace('\\x3d', '\x3d')
mediaURL = mediaURL.replace('\\x26', '\x26')
# Extract video thumbnail
if self._downloader.params.get('forcethumbnail', False):
- request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
+ request = compat_urllib_request.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
try:
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
return [{
'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'),
- 'uploader': u'NA',
- 'upload_date': u'NA',
+ 'uploader': None,
+ 'upload_date': None,
'title': video_title,
'ext': video_extension.decode('utf-8'),
- 'format': u'NA',
- 'player_url': None,
}]
video_extension = 'flv'
# Retrieve video webpage to extract further information
- request = urllib2.Request(url)
+ request = compat_urllib_request.Request(url)
try:
self.report_download_webpage(video_id)
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract media URL')
return
- mediaURL = urllib.unquote(mobj.group(1))
+ mediaURL = compat_urllib_parse.unquote(mobj.group(1))
video_url = mediaURL
'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'),
'uploader': video_uploader,
- 'upload_date': u'NA',
+ 'upload_date': None,
'title': video_title,
'ext': video_extension.decode('utf-8'),
- 'format': u'NA',
- 'player_url': None,
}]
# Rewrite valid but non-extractable URLs as
# extractable English language /watch/ URLs
if re.match(self._VPAGE_URL, url) is None:
- request = urllib2.Request(url)
+ request = compat_urllib_request.Request(url)
try:
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
return self._real_extract(url, new_video=False)
# Retrieve video webpage to extract further information
- request = urllib2.Request(url)
+ request = compat_urllib_request.Request(url)
try:
self.report_download_webpage(video_id)
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# seem to need most of them, otherwise the server sends a 401.
yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
yv_bitrate = '700' # according to Wikipedia this is hard-coded
- request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
+ request = compat_urllib_request.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
'&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
'&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
try:
self.report_download_webpage(video_id)
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
if mobj is None:
self._downloader.trouble(u'ERROR: Unable to extract media URL')
return
- video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
+ video_url = compat_urllib_parse.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
video_url = unescapeHTML(video_url)
return [{
'id': video_id.decode('utf-8'),
'url': video_url,
'uploader': video_uploader,
- 'upload_date': u'NA',
+ 'upload_date': None,
'title': video_title,
'ext': video_extension.decode('utf-8'),
'thumbnail': video_thumbnail.decode('utf-8'),
'description': video_description,
- 'thumbnail': video_thumbnail,
- 'player_url': None,
}]
video_id = mobj.group(1)
# Retrieve video webpage to extract further information
- request = urllib2.Request(url, None, std_headers)
+ request = compat_urllib_request.Request(url, None, std_headers)
try:
self.report_download_webpage(video_id)
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
else: video_description = ''
# Extract upload date
- video_upload_date = u'NA'
+ video_upload_date = None
mobj = re.search(r'<span id="clip-date" style="display:none">[^:]*: (.*?)( \([^\(]*\))?</span>', webpage)
if mobj is not None:
video_upload_date = mobj.group(1)
'ext': video_extension,
'thumbnail': video_thumbnail,
'description': video_description,
- 'player_url': None,
}]
+class ArteTvIE(InfoExtractor):
+ """arte.tv information extractor."""
+
+ _VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*'
+ _LIVE_URL = r'index-[0-9]+\.html$'
+
+ IE_NAME = u'arte.tv'
+
+ def __init__(self, downloader=None):
+ InfoExtractor.__init__(self, downloader)
+
+ def report_download_webpage(self, video_id):
+ """Report webpage download."""
+ self._downloader.to_screen(u'[arte.tv] %s: Downloading webpage' % video_id)
+
+ def report_extraction(self, video_id):
+ """Report information extraction."""
+ self._downloader.to_screen(u'[arte.tv] %s: Extracting information' % video_id)
+
+ def fetch_webpage(self, url):
+ self._downloader.increment_downloads()
+ request = compat_urllib_request.Request(url)
+ try:
+ self.report_download_webpage(url)
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+ return
+ except ValueError as err:
+ self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+ return
+ return webpage
+
+ def grep_webpage(self, url, regex, regexFlags, matchTuples):
+ page = self.fetch_webpage(url)
+ mobj = re.search(regex, page, regexFlags)
+ info = {}
+
+ if mobj is None:
+ self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+ return
+
+ for (i, key, err) in matchTuples:
+ if mobj.group(i) is None:
+ self._downloader.trouble(err)
+ return
+ else:
+ info[key] = mobj.group(i)
+
+ return info
+
+ def extractLiveStream(self, url):
+ video_lang = url.split('/')[-4]
+ info = self.grep_webpage(
+ url,
+ r'src="(.*?/videothek_js.*?\.js)',
+ 0,
+ [
+ (1, 'url', u'ERROR: Invalid URL: %s' % url)
+ ]
+ )
+ http_host = url.split('/')[2]
+ next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url')))
+ info = self.grep_webpage(
+ next_url,
+ r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
+ '(http://.*?\.swf).*?' +
+ '(rtmp://.*?)\'',
+ re.DOTALL,
+ [
+ (1, 'path', u'ERROR: could not extract video path: %s' % url),
+ (2, 'player', u'ERROR: could not extract video player: %s' % url),
+ (3, 'url', u'ERROR: could not extract video url: %s' % url)
+ ]
+ )
+ video_url = u'%s/%s' % (info.get('url'), info.get('path'))
+
+ def extractPlus7Stream(self, url):
+ video_lang = url.split('/')[-3]
+ info = self.grep_webpage(
+ url,
+ r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
+ 0,
+ [
+ (1, 'url', u'ERROR: Invalid URL: %s' % url)
+ ]
+ )
+ next_url = compat_urllib_parse.unquote(info.get('url'))
+ info = self.grep_webpage(
+ next_url,
+ r'<video lang="%s" ref="(http[^\'"&]*)' % video_lang,
+ 0,
+ [
+ (1, 'url', u'ERROR: Could not find <video> tag: %s' % url)
+ ]
+ )
+ next_url = compat_urllib_parse.unquote(info.get('url'))
+
+ info = self.grep_webpage(
+ next_url,
+ r'<video id="(.*?)".*?>.*?' +
+ '<name>(.*?)</name>.*?' +
+ '<dateVideo>(.*?)</dateVideo>.*?' +
+ '<url quality="hd">(.*?)</url>',
+ re.DOTALL,
+ [
+ (1, 'id', u'ERROR: could not extract video id: %s' % url),
+ (2, 'title', u'ERROR: could not extract video title: %s' % url),
+ (3, 'date', u'ERROR: could not extract video date: %s' % url),
+ (4, 'url', u'ERROR: could not extract video url: %s' % url)
+ ]
+ )
+
+ return {
+ 'id': info.get('id'),
+ 'url': compat_urllib_parse.unquote(info.get('url')),
+ 'uploader': u'arte.tv',
+ 'upload_date': info.get('date'),
+ 'title': info.get('title'),
+ 'ext': u'mp4',
+ 'format': u'NA',
+ 'player_url': None,
+ }
+
+ def _real_extract(self, url):
+ video_id = url.split('/')[-1]
+ self.report_extraction(video_id)
+
+ if re.search(self._LIVE_URL, video_id) is not None:
+ self.extractLiveStream(url)
+ return
+ else:
+ info = self.extractPlus7Stream(url)
+
+ return [info]
+
+
class GenericIE(InfoExtractor):
"""Generic last-resort information extractor."""
def _test_redirect(self, url):
"""Check if it is a redirect, like url shorteners, in case restart chain."""
- class HeadRequest(urllib2.Request):
+ class HeadRequest(compat_urllib_request.Request):
def get_method(self):
return "HEAD"
- class HEADRedirectHandler(urllib2.HTTPRedirectHandler):
+ class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
"""
Subclass the HTTPRedirectHandler to make it use our
HeadRequest also on the redirected URL
origin_req_host=req.get_origin_req_host(),
unverifiable=True)
else:
- raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
+ raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
- class HTTPMethodFallback(urllib2.BaseHandler):
+ class HTTPMethodFallback(compat_urllib_request.BaseHandler):
"""
Fallback to GET if HEAD is not allowed (405 HTTP error)
"""
newheaders = dict((k,v) for k,v in req.headers.items()
if k.lower() not in ("content-length", "content-type"))
- return self.parent.open(urllib2.Request(req.get_full_url(),
+ return self.parent.open(compat_urllib_request.Request(req.get_full_url(),
headers=newheaders,
origin_req_host=req.get_origin_req_host(),
unverifiable=True))
# Build our opener
- opener = urllib2.OpenerDirector()
- for handler in [urllib2.HTTPHandler, urllib2.HTTPDefaultErrorHandler,
+ opener = compat_urllib_request.OpenerDirector()
+ for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
HTTPMethodFallback, HEADRedirectHandler,
- urllib2.HTTPErrorProcessor, urllib2.HTTPSHandler]:
+ compat_urllib_error.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
opener.add_handler(handler())
response = opener.open(HeadRequest(url))
new_url = response.geturl()
-
- if url == new_url: return False
-
+
+ if url == new_url:
+ return False
+
self.report_following_redirect(new_url)
self._downloader.download([new_url])
return True
if self._test_redirect(url): return
video_id = url.split('/')[-1]
- request = urllib2.Request(url)
+ request = compat_urllib_request.Request(url)
try:
self.report_download_webpage(video_id)
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
- except ValueError, err:
+ except ValueError as err:
# since this is the last-resort InfoExtractor, if
# this error is thrown, it'll be thrown here
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
return
- video_url = urllib.unquote(mobj.group(1))
+ video_url = compat_urllib_parse.unquote(mobj.group(1))
video_id = os.path.basename(video_url)
# here's a fun little line of code for you:
'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'),
'uploader': video_uploader,
- 'upload_date': u'NA',
+ 'upload_date': None,
'title': video_title,
'ext': video_extension.decode('utf-8'),
- 'format': u'NA',
- 'player_url': None,
}]
return
else:
try:
- n = long(prefix)
+ n = int(prefix)
if n <= 0:
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return
while (50 * pagenum) < limit:
self.report_download_page(query, pagenum+1)
- result_url = self._API_URL % (urllib.quote_plus(query), (50*pagenum)+1)
- request = urllib2.Request(result_url)
+ result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
+ request = compat_urllib_request.Request(result_url)
try:
- data = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ data = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download API page: %s' % compat_str(err))
return
api_response = json.loads(data)['data']
return
else:
try:
- n = long(prefix)
+ n = int(prefix)
if n <= 0:
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return
while True:
self.report_download_page(query, pagenum)
- result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum*10)
- request = urllib2.Request(result_url)
+ result_url = self._TEMPLATE_URL % (compat_urllib_parse.quote_plus(query), pagenum*10)
+ request = compat_urllib_request.Request(result_url)
try:
- page = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ page = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
return
else:
try:
- n = long(prefix)
+ n = int(prefix)
if n <= 0:
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return
while True:
self.report_download_page(query, pagenum)
- result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
- request = urllib2.Request(result_url)
+ result_url = self._TEMPLATE_URL % (compat_urllib_parse.quote_plus(query), pagenum)
+ request = compat_urllib_request.Request(result_url)
try:
- page = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ page = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
while True:
self.report_download_page(playlist_id, pagenum)
url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)
- request = urllib2.Request(url)
+ request = compat_urllib_request.Request(url)
try:
- page = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ page = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
while True:
self.report_download_page(channel_id, pagenum)
url = self._TEMPLATE_URL % (channel_id, pagenum)
- request = urllib2.Request(url)
+ request = compat_urllib_request.Request(url)
try:
- page = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ page = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
start_index = pagenum * self._GDATA_PAGE_SIZE + 1
self.report_download_page(username, start_index)
- request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
+ request = compat_urllib_request.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
try:
- page = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ page = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
- request = urllib2.Request(url)
+ request = compat_urllib_request.Request(url)
try:
- page = urllib2.urlopen(request).read().decode('utf-8')
+ page = compat_urllib_request.urlopen(request).read().decode('utf-8')
mobj = re.search(r'data-users-id="([^"]+)"', page)
page_base = page_base % mobj.group(1)
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
while True:
self.report_download_page(username, pagenum)
- request = urllib2.Request( page_base + "&page=" + str(pagenum) )
+ request = compat_urllib_request.Request( page_base + "&page=" + str(pagenum) )
try:
- page = urllib2.urlopen(request).read().decode('utf-8')
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ page = compat_urllib_request.urlopen(request).read().decode('utf-8')
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
return
# Retrieve file webpage with 'Free download' button pressed
free_download_indication = { 'gateway_result' : '1' }
- request = urllib2.Request(url, urllib.urlencode(free_download_indication))
+ request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
try:
self.report_download_webpage(file_id)
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % compat_str(err))
return
return [{
'id': file_id.decode('utf-8'),
'url': file_url.decode('utf-8'),
- 'uploader': u'NA',
- 'upload_date': u'NA',
+ 'uploader': None,
+ 'upload_date': None,
'title': file_title,
'ext': file_extension.decode('utf-8'),
- 'format': u'NA',
- 'player_url': None,
}]
class FacebookIE(InfoExtractor):
"""Information Extractor for Facebook"""
+ _WORKING = False
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
_NETRC_MACHINE = 'facebook'
for piece in data.keys():
mobj = re.search(data[piece], video_webpage)
if mobj is not None:
- video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
+ video_info[piece] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
# Video urls
video_urls = {}
if mobj is not None:
# URL is in a Javascript segment inside an escaped Unicode format within
# the generally utf-8 page
- video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
+ video_urls[fmt] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
video_info['video_urls'] = video_urls
return video_info
password = info[2]
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
- except (IOError, netrc.NetrcParseError), err:
+ except (IOError, netrc.NetrcParseError) as err:
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
return
'pass': password,
'login': 'Log+In'
}
- request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
+ request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
try:
self.report_login()
- login_results = urllib2.urlopen(request).read()
+ login_results = compat_urllib_request.urlopen(request).read()
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
return
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
return
# Get video webpage
self.report_video_webpage_download(video_id)
- request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
+ request = compat_urllib_request.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
try:
- page = urllib2.urlopen(request)
+ page = compat_urllib_request.urlopen(request)
video_webpage = page.read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
video_thumbnail = video_info['thumbnail']
# upload date
- upload_date = u'NA'
+ upload_date = None
if 'upload_date' in video_info:
upload_time = video_info['upload_date']
timetuple = email.utils.parsedate_tz(upload_time)
'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
'thumbnail': video_thumbnail.decode('utf-8'),
'description': video_description.decode('utf-8'),
- 'player_url': None,
})
return results
else:
cchar = '?'
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
- request = urllib2.Request(json_url.encode('utf-8'))
+ request = compat_urllib_request.Request(json_url.encode('utf-8'))
self.report_extraction(mobj.group(1))
info = None
try:
- urlh = urllib2.urlopen(request)
+ urlh = compat_urllib_request.urlopen(request)
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
basename = url.split('/')[-1]
title,ext = os.path.splitext(basename)
info = {
'id': title,
'url': url,
+ 'uploader': None,
+ 'upload_date': None,
'title': title,
'ext': ext,
'urlhandle': urlh
}
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
return
if info is None: # Regular URL
try:
json_code = urlh.read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % compat_str(err))
return
'description': data['description'],
'player_url': data['embedUrl']
}
- except (ValueError,KeyError), err:
+ except (ValueError,KeyError) as err:
self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
return
video_id = mobj.group(1)
# Get video webpage
- request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
+ request = compat_urllib_request.Request('http://www.myvideo.de/watch/%s' % video_id)
try:
self.report_download_webpage(video_id)
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
return [{
'id': video_id,
'url': video_url,
- 'uploader': u'NA',
- 'upload_date': u'NA',
+ 'uploader': None,
+ 'upload_date': None,
'title': video_title,
'ext': u'flv',
- 'format': u'NA',
- 'player_url': None,
}]
class ComedyCentralIE(InfoExtractor):
_VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
IE_NAME = u'comedycentral'
+ _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
+
+ _video_extensions = {
+ '3500': 'mp4',
+ '2200': 'mp4',
+ '1700': 'mp4',
+ '1200': 'mp4',
+ '750': 'mp4',
+ '400': 'mp4',
+ }
+ _video_dimensions = {
+ '3500': '1280x720',
+ '2200': '960x540',
+ '1700': '768x432',
+ '1200': '640x360',
+ '750': '512x288',
+ '400': '384x216',
+ }
+
def report_extraction(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
def report_player_url(self, episode_id):
self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
+
+ def _print_formats(self, formats):
+ print('Available formats:')
+ for x in formats:
+ print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
+
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
else:
epTitle = mobj.group('episode')
- req = urllib2.Request(url)
+ req = compat_urllib_request.Request(url)
self.report_extraction(epTitle)
try:
- htmlHandle = urllib2.urlopen(req)
+ htmlHandle = compat_urllib_request.urlopen(req)
html = htmlHandle.read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
if dlNewest:
url = htmlHandle.geturl()
epTitle = mobj.group('episode')
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"', html)
+
if len(mMovieParams) == 0:
- self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
- return
+ # The Colbert Report embeds the information in a without
+ # a URL prefix; so extract the alternate reference
+ # and then add the URL prefix manually.
+ altMovieParams = re.findall('data-mgid="([^"]*episode.*?:.*?)"', html)
+ if len(altMovieParams) == 0:
+ self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
+ return
+ else:
+ mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
+
playerUrl_raw = mMovieParams[0][0]
self.report_player_url(epTitle)
try:
- urlHandle = urllib2.urlopen(playerUrl_raw)
+ urlHandle = compat_urllib_request.urlopen(playerUrl_raw)
playerUrl = urlHandle.geturl()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.trouble(u'ERROR: unable to find out player URL: ' + compat_str(err))
return
uri = mMovieParams[0][1]
- indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri})
+ indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
self.report_index_download(epTitle)
try:
- indexXml = urllib2.urlopen(indexUrl).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
+ indexXml = compat_urllib_request.urlopen(indexUrl).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.trouble(u'ERROR: unable to download episode index: ' + compat_str(err))
return
results = []
officialDate = itemEl.findall('./pubDate')[0].text
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
- urllib.urlencode({'uri': mediaId}))
- configReq = urllib2.Request(configUrl)
+ compat_urllib_parse.urlencode({'uri': mediaId}))
+ configReq = compat_urllib_request.Request(configUrl)
self.report_config_download(epTitle)
try:
- configXml = urllib2.urlopen(configReq).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
+ configXml = compat_urllib_request.urlopen(configReq).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
cdoc = xml.etree.ElementTree.fromstring(configXml)
if len(turls) == 0:
self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
continue
+
+ if self._downloader.params.get('listformats', None):
+ self._print_formats([i[0] for i in turls])
+ return
# For now, just pick the highest bitrate
format,video_url = turls[-1]
+ # Get the format arg from the arg stream
+ req_format = self._downloader.params.get('format', None)
+
+ # Select format if we can find one
+ for f,v in turls:
+ if f == req_format:
+ format, video_url = f, v
+ break
+
+ # Patch to download from alternative CDN, which does not
+ # break on current RTMPDump builds
+ broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/"
+ better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/"
+
+ if video_url.startswith(broken_cdn):
+ video_url = video_url.replace(broken_cdn, better_cdn)
+
effTitle = showId + u'-' + epTitle
info = {
'id': shortMediaId,
'format': format,
'thumbnail': None,
'description': officialTitle,
- 'player_url': playerUrl
+ 'player_url': None #playerUrl
}
results.append(info)
self.report_extraction(showName)
try:
- webPage = urllib2.urlopen(url)
+ webPage = compat_urllib_request.urlopen(url)
webPageBytes = webPage.read()
m = re.match(r'text/html; charset="?([^"]+)"?', webPage.headers['Content-Type'])
webPage = webPageBytes.decode(m.group(1) if m else 'utf-8')
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.trouble(u'ERROR: unable to download webpage: ' + compat_str(err))
return
descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
playerUrl = unescapeHTML(playerUrlMatch.group(1))
configUrlMatch = re.search('config=(.*)$', playerUrl)
- configUrl = urllib2.unquote(configUrlMatch.group(1))
+ configUrl = compat_urllib_parse.unquote(configUrlMatch.group(1))
self.report_config_download(showName)
try:
- configJSON = urllib2.urlopen(configUrl).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err))
+ configJSON = compat_urllib_request.urlopen(configUrl).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.trouble(u'ERROR: unable to download configuration: ' + compat_str(err))
return
# Technically, it's JavaScript, not JSON
try:
config = json.loads(configJSON)
- except (ValueError,), err:
- self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err))
+ except (ValueError,) as err:
+ self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + compat_str(err))
return
playlist = config['playlist']
'upload_date': None,
'title': showName,
'ext': 'flv',
- 'format': 'flv',
'thumbnail': imgUrl,
'description': description,
'player_url': playerUrl,
video_id = mobj.group('videoid')
self.report_webpage(video_id)
- request = urllib2.Request(url)
+ request = compat_urllib_request.Request(url)
try:
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
info = {
'id': video_id,
'internal_id': internal_video_id,
+ 'uploader': None,
+ 'upload_date': None,
}
self.report_extraction(video_id)
xmlUrl = 'http://www.collegehumor.com/moogaloop/video:' + internal_video_id
try:
- metaXml = urllib2.urlopen(xmlUrl).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ metaXml = compat_urllib_request.urlopen(xmlUrl).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err))
return
info['url'] = videoNode.findall('./file')[0].text
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
info['ext'] = info['url'].rpartition('.')[2]
- info['format'] = info['ext']
except IndexError:
self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
return
self.report_webpage(video_id)
- request = urllib2.Request(r'http://www.xvideos.com/video' + video_id)
+ request = compat_urllib_request.Request(r'http://www.xvideos.com/video' + video_id)
try:
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract video url')
return
- video_url = urllib2.unquote(mobj.group(1).decode('utf-8'))
+ video_url = compat_urllib_parse.unquote(mobj.group(1).decode('utf-8'))
# Extract title
'upload_date': None,
'title': video_title,
'ext': 'flv',
- 'format': 'flv',
'thumbnail': video_thumbnail,
'description': None,
- 'player_url': None,
}
return [info]
self.report_webpage('%s/%s' % (uploader, slug_title))
- request = urllib2.Request('http://soundcloud.com/%s/%s' % (uploader, slug_title))
+ request = compat_urllib_request.Request('http://soundcloud.com/%s/%s' % (uploader, slug_title))
try:
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
if mobj:
try:
upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
- except Exception, e:
- self._downloader.to_stderr(compat_str(e))
+ except Exception as err:
+ self._downloader.to_stderr(compat_str(err))
# for soundcloud, a request to a cross domain is required for cookies
- request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
+ request = compat_urllib_request.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
return [{
'id': video_id.decode('utf-8'),
'upload_date': upload_date,
'title': title,
'ext': u'mp3',
- 'format': u'NA',
- 'player_url': None,
'description': description.decode('utf-8')
}]
self.report_webpage(url)
- request = urllib2.Request(url)
+ request = compat_urllib_request.Request(url)
try:
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract video url')
return
- video_url = 'rtmpe://video.infoq.com/cfx/st/' + urllib2.unquote(mobj.group(1).decode('base64'))
+ video_url = 'rtmpe://video.infoq.com/cfx/st/' + compat_urllib_parse.unquote(mobj.group(1).decode('base64'))
# Extract title
'uploader': None,
'upload_date': None,
'title': video_title,
- 'ext': extension,
- 'format': extension, # Extension is always(?) mp4, but seems to be flv
+ 'ext': extension, # Extension is always(?) mp4, but seems to be flv
'thumbnail': None,
'description': video_description,
- 'player_url': None,
}
return [info]
"""Returns 1st active url from list"""
for url in url_list:
try:
- urllib2.urlopen(url)
+ compat_urllib_request.urlopen(url)
return url
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
url = None
return None
# construct API request
file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
# retrieve .json file with links to files
- request = urllib2.Request(file_url)
+ request = compat_urllib_request.Request(file_url)
try:
self.report_download_json(file_url)
- jsonData = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ jsonData = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % compat_str(err))
return
'id': file_id.decode('utf-8'),
'url': file_url.decode('utf-8'),
'uploader': uploader.decode('utf-8'),
- 'upload_date': u'NA',
+ 'upload_date': None,
'title': json_data['name'],
'ext': file_url.split('.')[-1].decode('utf-8'),
'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
video = mobj.group('video')
info = {
'id': course + '_' + video,
+ 'uploader': None,
+ 'upload_date': None,
}
self.report_extraction(info['id'])
baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
xmlUrl = baseUrl + video + '.xml'
try:
- metaXml = urllib2.urlopen(xmlUrl).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % unicode(err))
+ metaXml = compat_urllib_request.urlopen(xmlUrl).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err))
return
mdoc = xml.etree.ElementTree.fromstring(metaXml)
try:
self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
return
info['ext'] = info['url'].rpartition('.')[2]
- info['format'] = info['ext']
return [info]
elif mobj.group('course'): # A course page
course = mobj.group('course')
info = {
'id': course,
'type': 'playlist',
+ 'uploader': None,
+ 'upload_date': None,
}
self.report_download_webpage(info['id'])
try:
- coursepage = urllib2.urlopen(url).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err))
+ coursepage = compat_urllib_request.urlopen(url).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.trouble(u'ERROR: unable to download course info page: ' + compat_str(err))
return
m = re.search('<h1>([^<]+)</h1>', coursepage)
info = {
'id': 'Stanford OpenClassroom',
'type': 'playlist',
+ 'uploader': None,
+ 'upload_date': None,
}
self.report_download_webpage(info['id'])
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
try:
- rootpage = urllib2.urlopen(rootURL).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err))
+ rootpage = compat_urllib_request.urlopen(rootURL).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.trouble(u'ERROR: unable to download course info page: ' + compat_str(err))
return
info['title'] = info['id']
video_id = mobj.group('videoid')
self.report_webpage(video_id)
- request = urllib2.Request(url)
+ request = compat_urllib_request.Request(url)
try:
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
self.report_extraction(video_id)
- request = urllib2.Request(videogen_url)
+ request = compat_urllib_request.Request(videogen_url)
try:
- metadataXml = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ metadataXml = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % compat_str(err))
return
'id': video_id,
'url': video_url,
'uploader': performer,
+ 'upload_date': None,
'title': video_title,
'ext': ext,
'format': format,
info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
- request = urllib2.Request(info_url, None, std_headers)
+ request = compat_urllib_request.Request(info_url, None, std_headers)
try:
self.report_download_webpage(video_id)
- jsondata = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
+ jsondata = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
'id': '%s_part%02d' % (video_id, index),
'url': download_url,
'uploader': None,
+ 'upload_date': None,
'title': video_title,
'ext': ext,
- 'format': u'NA'
}
files_info.append(info)
# Get webpage content
try:
- webpage = urllib2.urlopen(url).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(url).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % err)
return
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video url')
return
- video_url = urllib.unquote(result.group(1).decode('utf-8'))
+ video_url = compat_urllib_parse.unquote(result.group(1).decode('utf-8'))
result = re.search(self.VIDEO_TITLE_RE, webpage)
if result is None:
return
video_thumbnail = result.group(1).decode('utf-8')
- info = {'id': video_id,
- 'url': video_url,
- 'uploader': None,
- 'upload_date': None,
- 'title': video_title,
- 'ext': 'flv',
- 'format': 'flv',
- 'thumbnail': video_thumbnail,
- 'description': None,
- 'player_url': None}
-
- return [info]
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'uploader': None,
+ 'upload_date': None,
+ 'title': video_title,
+ 'ext': 'flv',
+ 'thumbnail': video_thumbnail,
+ 'description': None,
+ }]
class GooglePlusIE(InfoExtractor):
# Step 1, Retrieve post webpage to extract further information
self.report_extract_entry(post_url)
- request = urllib2.Request(post_url)
+ request = compat_urllib_request.Request(post_url)
try:
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err))
return
# Extract update date
- upload_date = u'NA'
+ upload_date = None
pattern = 'title="Timestamp">(.*?)</a>'
mobj = re.search(pattern, webpage)
if mobj:
self.report_date(upload_date)
# Extract uploader
- uploader = u'NA'
+ uploader = None
pattern = r'rel\="author".*?>(.*?)</a>'
mobj = re.search(pattern, webpage)
if mobj:
self._downloader.trouble(u'ERROR: unable to extract video page URL')
video_page = mobj.group(1)
- request = urllib2.Request(video_page)
+ request = compat_urllib_request.Request(video_page)
try:
- webpage = urllib2.urlopen(request).read()
- except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ webpage = compat_urllib_request.urlopen(request).read()
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
self.report_extract_vid_page(video_page)
'upload_date': upload_date.decode('utf-8'),
'title': video_title.decode('utf-8'),
'ext': video_extension.decode('utf-8'),
- 'format': u'NA',
- 'player_url': None,
}]