X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fgeneric.py;h=d48c84f8d575111dd0459056efd33e7338b1a1df;hb=f5e54a1fda6fcc4ef279e54ff6cf63f6eae71bb0;hp=759fd60a73931c177e876b5102c2bd95fc0c60ea;hpb=de282fc217e481bb3cce6158a09551658759fe01;p=youtube-dl
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 759fd60a7..d48c84f8d 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1,3 +1,5 @@
+# encoding: utf-8
+
import os
import re
@@ -6,15 +8,28 @@ from ..utils import (
compat_urllib_error,
compat_urllib_parse,
compat_urllib_request,
+ compat_urlparse,
ExtractorError,
)
+from .brightcove import BrightcoveIE
-class GenericIE(InfoExtractor):
- """Generic last-resort information extractor."""
+class GenericIE(InfoExtractor):
+ IE_DESC = u'Generic downloader that works on some sites'
_VALID_URL = r'.*'
IE_NAME = u'generic'
+ _TESTS = [
+ {
+ u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
+ u'file': u'13601338388002.mp4',
+ u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
+ u'info_dict': {
+ u"uploader": u"www.hodiho.fr",
+ u"title": u"R\u00e9gis plante sa Jeep"
+ }
+ },
+ ]
def report_download_webpage(self, video_id):
"""Report webpage download."""
@@ -83,8 +98,18 @@ class GenericIE(InfoExtractor):
return new_url
def _real_extract(self, url):
- new_url = self._test_redirect(url)
- if new_url: return [self.url_result(new_url)]
+ parsed_url = compat_urlparse.urlparse(url)
+ if not parsed_url.scheme:
+ self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
+ return self.url_result('http://' + url)
+
+ try:
+ new_url = self._test_redirect(url)
+ if new_url:
+ return [self.url_result(new_url)]
+ except compat_urllib_error.HTTPError:
+ # This may be a stupid server that doesn't like HEAD, our UA, or so
+ pass
video_id = url.split('/')[-1]
try:
@@ -92,9 +117,16 @@ class GenericIE(InfoExtractor):
except ValueError:
# since this is the last-resort InfoExtractor, if
# this error is thrown, it'll be thrown here
- raise ExtractorError(u'Invalid URL: %s' % url)
+ raise ExtractorError(u'Failed to download URL: %s' % url)
self.report_extraction(video_id)
+ # Look for BrightCove:
+ m_brightcove = re.search(r'', webpage, re.DOTALL)
+ if m_brightcove is not None:
+ self.to_screen(u'Brightcove video detected.')
+ bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
+ return self.url_result(bc_url, 'Brightcove')
+
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:
@@ -114,15 +146,19 @@ class GenericIE(InfoExtractor):
if m_video_type is not None:
mobj = re.search(r'.*?(.*)',
- webpage, u'video title', default=u'video')
+ webpage, u'video title', default=u'video', flags=re.DOTALL)
# video uploader is domain name
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',