projects
/
youtube-dl
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
f1f25be
)
[generic] Allow multiple matches for generic hits (Fixes #2818)
author
Philipp Hagemeister
<phihag@phihag.de>
Wed, 30 Apr 2014 00:23:51 +0000
(
02:23
+0200)
committer
Philipp Hagemeister
<phihag@phihag.de>
Wed, 30 Apr 2014 00:23:51 +0000
(
02:23
+0200)
youtube_dl/extractor/generic.py
patch
|
blob
|
history
diff --git
a/youtube_dl/extractor/generic.py
b/youtube_dl/extractor/generic.py
index cfb009d7954c577526d4f086fbfbe48c2034388a..58092da38e44a642efb165a0dc62ee149412a847 100644
(file)
--- a/
youtube_dl/extractor/generic.py
+++ b/
youtube_dl/extractor/generic.py
@@
-637,70
+637,77
@@
class GenericIE(InfoExtractor):
return self.url_result(smotri_url, 'Smotri')
# Start with something easy: JW Player in SWFObject
return self.url_result(smotri_url, 'Smotri')
# Start with something easy: JW Player in SWFObject
-
mobj = re.search
(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
- if
mobj is None
:
+
found = re.findall
(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
+ if
not found
:
# Look for gorilla-vid style embedding
# Look for gorilla-vid style embedding
-
mobj = re.search
(r'''(?sx)
+
found = re.findall
(r'''(?sx)
(?:
jw_plugins|
JWPlayerOptions|
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
)
.*?file\s*:\s*["\'](.*?)["\']''', webpage)
(?:
jw_plugins|
JWPlayerOptions|
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
)
.*?file\s*:\s*["\'](.*?)["\']''', webpage)
- if
mobj is None
:
+ if
not found
:
# Broaden the search a little bit
# Broaden the search a little bit
- mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
- if mobj is None:
- # Broaden the search a little bit: JWPlayer JS loader
- mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
-
- if mobj is None:
+ found = re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
+ if not found:
+ # Broaden the findall a little bit: JWPlayer JS loader
+ found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
+ if not found:
# Try to find twitter cards info
# Try to find twitter cards info
-
mobj = re.search
(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
- if
mobj is None
:
+
found = re.findall
(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
+ if
not found
:
# We look for Open Graph info:
# We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
# We look for Open Graph info:
# We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
- m_video_type = re.
search
(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
+ m_video_type = re.
findall
(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
if m_video_type is not None:
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
if m_video_type is not None:
-
mobj = re.search
(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
- if
mobj is None
:
+
found = re.findall
(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
+ if
not found
:
# HTML5 video
# HTML5 video
-
mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL
)
- if
mobj is None
:
-
mobj = re.search
(
+
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage
)
+ if
not found
:
+
found = re.findall
(
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"',
webpage)
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"',
webpage)
- if
mobj
:
- new_url =
mobj
.group(1)
+ if
found
:
+ new_url =
found
.group(1)
self.report_following_redirect(new_url)
return {
'_type': 'url',
'url': new_url,
}
self.report_following_redirect(new_url)
return {
'_type': 'url',
'url': new_url,
}
- if
mobj is None
:
+ if
not found
:
raise ExtractorError('Unsupported URL: %s' % url)
raise ExtractorError('Unsupported URL: %s' % url)
- # It's possible that one of the regexes
-
# matched, but returned an empty group
:
- if mobj.group(1) is None:
-
raise ExtractorError('Did not find a valid video URL at %s' % url
)
+ entries = []
+
for video_url in found
:
+ video_url = compat_urlparse.urljoin(url, video_url)
+
video_id = compat_urllib_parse.unquote(os.path.basename(video_url)
)
- video_url = mobj.group(1)
- video_url = compat_urlparse.urljoin(url, video_url)
- video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
+ # Sometimes, jwplayer extraction will result in a YouTube URL
+ if YoutubeIE.suitable(video_url):
+ entries.append(self.url_result(video_url, 'Youtube'))
+ continue
- # Sometimes, jwplayer extraction will result in a YouTube URL
- if YoutubeIE.suitable(video_url):
- return self.url_result(video_url, 'Youtube')
+ # here's a fun little line of code for you:
+ video_id = os.path.splitext(video_id)[0]
- # here's a fun little line of code for you:
- video_id = os.path.splitext(video_id)[0]
+ entries.append({
+ 'id': video_id,
+ 'url': video_url,
+ 'uploader': video_uploader,
+ 'title': video_title,
+ })
+
+ if len(entries) == 1:
+ return entries[1]
+ else:
+ for num, e in enumerate(entries, start=1):
+ e['title'] = '%s (%d)' % (e['title'], num)
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ }
- return {
- 'id': video_id,
- 'url': video_url,
- 'uploader': video_uploader,
- 'title': video_title,
- }