projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Merge pull request #1231 from yasoob/master
[youtube-dl]
/
youtube_dl
/
extractor
/
generic.py
diff --git
a/youtube_dl/extractor/generic.py
b/youtube_dl/extractor/generic.py
index be9d29a6362f3bd51299507dc1c21a79a90abe94..8488dca05293f2779619ae59a7b4e17f1c856290 100644
(file)
--- a/
youtube_dl/extractor/generic.py
+++ b/
youtube_dl/extractor/generic.py
@@
-7,12
+7,14
@@
from .common import InfoExtractor
from ..utils import (
compat_urllib_error,
compat_urllib_parse,
from ..utils import (
compat_urllib_error,
compat_urllib_parse,
+ compat_urllib_parse_urlparse,
compat_urllib_request,
ExtractorError,
)
from .brightcove import BrightcoveIE
compat_urllib_request,
ExtractorError,
)
from .brightcove import BrightcoveIE
+
class GenericIE(InfoExtractor):
IE_DESC = u'Generic downloader that works on some sites'
_VALID_URL = r'.*'
class GenericIE(InfoExtractor):
IE_DESC = u'Generic downloader that works on some sites'
_VALID_URL = r'.*'
@@
-23,7
+25,7
@@
class GenericIE(InfoExtractor):
u'file': u'13601338388002.mp4',
u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
u'info_dict': {
u'file': u'13601338388002.mp4',
u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
u'info_dict': {
- u"uploader": u"www.hodiho.fr",
+ u"uploader": u"www.hodiho.fr",
u"title": u"R\u00e9gis plante sa Jeep"
}
},
u"title": u"R\u00e9gis plante sa Jeep"
}
},
@@
-107,8
+109,13
@@
class GenericIE(InfoExtractor):
return new_url
def _real_extract(self, url):
return new_url
def _real_extract(self, url):
- new_url = self._test_redirect(url)
- if new_url: return [self.url_result(new_url)]
+ try:
+ new_url = self._test_redirect(url)
+ if new_url:
+ return [self.url_result(new_url)]
+ except compat_urllib_error.HTTPError:
+ # This may be a stupid server that doesn't like HEAD, our UA, or so
+ pass
video_id = url.split('/')[-1]
try:
video_id = url.split('/')[-1]
try:
@@
-120,7
+127,7
@@
class GenericIE(InfoExtractor):
self.report_extraction(video_id)
# Look for BrigthCove:
self.report_extraction(video_id)
# Look for BrigthCove:
- m_brightcove = re.search(r'<object.+?class=
"BrightcoveExperience"
.+?</object>', webpage, re.DOTALL)
+ m_brightcove = re.search(r'<object.+?class=
([\'"]).*?BrightcoveExperience.*?\1
.+?</object>', webpage, re.DOTALL)
if m_brightcove is not None:
self.to_screen(u'Brightcove video detected.')
bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
if m_brightcove is not None:
self.to_screen(u'Brightcove video detected.')
bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
@@
-144,6
+151,9
@@
class GenericIE(InfoExtractor):
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
if m_video_type is not None:
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
if m_video_type is not None:
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
+ if mobj is None:
+ # HTML5 video
+ mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
@@
-153,6
+163,10
@@
class GenericIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url)
video_url = compat_urllib_parse.unquote(mobj.group(1))
raise ExtractorError(u'Invalid URL: %s' % url)
video_url = compat_urllib_parse.unquote(mobj.group(1))
+ if video_url.startswith('//'):
+ video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url
+ if '://' not in video_url:
+ video_url = url + ('' if url.endswith('/') else '/') + video_url
video_id = os.path.basename(video_url)
# here's a fun little line of code for you:
video_id = os.path.basename(video_url)
# here's a fun little line of code for you: