projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Merge branch 'googledrive' of github.com:remitamine/youtube-dl into remitamine-google...
[youtube-dl]
/
youtube_dl
/
extractor
/
generic.py
diff --git
a/youtube_dl/extractor/generic.py
b/youtube_dl/extractor/generic.py
index 6cffde20d907fb96b9ecf53e11e610b9ca0acd2c..7cf13fddfe37fa1bf2d2e6329c060f9d4c6286f7 100644
(file)
--- a/
youtube_dl/extractor/generic.py
+++ b/
youtube_dl/extractor/generic.py
@@
-11,7
+11,6
@@
from .youtube import YoutubeIE
from ..compat import (
compat_etree_fromstring,
compat_urllib_parse_unquote,
from ..compat import (
compat_etree_fromstring,
compat_urllib_parse_unquote,
- compat_urllib_request,
compat_urlparse,
compat_xml_parse_error,
)
compat_urlparse,
compat_xml_parse_error,
)
@@
-22,6
+21,7
@@
from ..utils import (
HEADRequest,
is_html,
orderedSet,
HEADRequest,
is_html,
orderedSet,
+ sanitized_Request,
smuggle_url,
unescapeHTML,
unified_strdate,
smuggle_url,
unescapeHTML,
unified_strdate,
@@
-54,6
+54,8
@@
from .onionstudios import OnionStudiosIE
from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
+from .pladform import PladformIE
+from .googledrive import GoogleDriveIE
class GenericIE(InfoExtractor):
class GenericIE(InfoExtractor):
@@
-339,6
+341,7
@@
class GenericIE(InfoExtractor):
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
'ext': 'mp4',
'title': '2cc213299525360.mov', # that's what we get
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
'ext': 'mp4',
'title': '2cc213299525360.mov', # that's what we get
+ 'duration': 238.231,
},
'add_ie': ['Ooyala'],
},
},
'add_ie': ['Ooyala'],
},
@@
-350,6
+353,7
@@
class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': '"Steve Jobs: Man in the Machine" trailer',
'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
'ext': 'mp4',
'title': '"Steve Jobs: Man in the Machine" trailer',
'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
+ 'duration': 135.427,
},
'params': {
'skip_download': True,
},
'params': {
'skip_download': True,
@@
-960,8
+964,9
@@
class GenericIE(InfoExtractor):
'info_dict': {
'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
'ext': 'mp4',
'info_dict': {
'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
'ext': 'mp4',
- 'description': 'VIDEO: I
ndex/Match
versus VLOOKUP.',
+ 'description': 'VIDEO: I
NDEX/MATCH
versus VLOOKUP.',
'title': 'This is what separates the Excel masters from the wannabes',
'title': 'This is what separates the Excel masters from the wannabes',
+ 'duration': 191.933,
},
'params': {
# m3u8 downloads
},
'params': {
# m3u8 downloads
@@
-1058,6
+1063,20
@@
class GenericIE(InfoExtractor):
'description': 'Tabletop: Dread, Last Thoughts',
'duration': 51690,
},
'description': 'Tabletop: Dread, Last Thoughts',
'duration': 51690,
},
+ },
+ # JWPlayer with M3U8
+ {
+ 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
+ 'info_dict': {
+ 'id': 'playlist',
+ 'ext': 'mp4',
+ 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
+ 'uploader': 'ren.tv',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ }
}
]
}
]
@@
-1201,7
+1220,7
@@
class GenericIE(InfoExtractor):
full_response = None
if head_response is False:
full_response = None
if head_response is False:
- request =
compat_urllib_request.
Request(url)
+ request =
sanitized_
Request(url)
request.add_header('Accept-Encoding', '*')
full_response = self._request_webpage(request, video_id)
head_response = full_response
request.add_header('Accept-Encoding', '*')
full_response = self._request_webpage(request, video_id)
head_response = full_response
@@
-1230,7
+1249,7
@@
class GenericIE(InfoExtractor):
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
if not full_response:
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
if not full_response:
- request =
compat_urllib_request.
Request(url)
+ request =
sanitized_
Request(url)
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
# making it impossible to download only chunk of the file (yet we need only 512kB to
# test whether it's HTML or not). According to youtube-dl default Accept-Encoding
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
# making it impossible to download only chunk of the file (yet we need only 512kB to
# test whether it's HTML or not). According to youtube-dl default Accept-Encoding
@@
-1487,7
+1506,7
@@
class GenericIE(InfoExtractor):
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None:
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None:
- return OoyalaIE._build_url_result(
mobj.group('ec'
))
+ return OoyalaIE._build_url_result(
smuggle_url(mobj.group('ec'), {'domain': url}
))
# Look for multiple Ooyala embeds on SBN network websites
mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
# Look for multiple Ooyala embeds on SBN network websites
mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
@@
-1495,7
+1514,7
@@
class GenericIE(InfoExtractor):
embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
if embeds:
return _playlist_from_matches(
embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
if embeds:
return _playlist_from_matches(
- embeds, getter=lambda v: OoyalaIE._url_for_embed_code(
v['provider_video_id']
), ie='Ooyala')
+ embeds, getter=lambda v: OoyalaIE._url_for_embed_code(
smuggle_url(v['provider_video_id'], {'domain': url})
), ie='Ooyala')
# Look for Aparat videos
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
# Look for Aparat videos
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
@@
-1724,10
+1743,9
@@
class GenericIE(InfoExtractor):
return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
# Look for Pladform embeds
return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
# Look for Pladform embeds
- mobj = re.search(
- r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Pladform')
+ pladform_url = PladformIE._extract_url(webpage)
+ if pladform_url:
+ return self.url_result(pladform_url)
# Look for Playwire embeds
mobj = re.search(
# Look for Playwire embeds
mobj = re.search(
@@
-1752,6
+1770,11
@@
class GenericIE(InfoExtractor):
if nbc_sports_url:
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
if nbc_sports_url:
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+ # Look for Google Drive embeds
+ google_drive_url = GoogleDriveIE._extract_url(webpage)
+ if google_drive_url:
+ return self.url_result(google_drive_url, 'GoogleDrive')
+
# Look for UDN embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
# Look for UDN embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
@@
-1886,25
+1909,24
@@
class GenericIE(InfoExtractor):
# here's a fun little line of code for you:
video_id = os.path.splitext(video_id)[0]
# here's a fun little line of code for you:
video_id = os.path.splitext(video_id)[0]
+ entry_info_dict = {
+ 'id': video_id,
+ 'uploader': video_uploader,
+ 'title': video_title,
+ 'age_limit': age_limit,
+ }
+
ext = determine_ext(video_url)
if ext == 'smil':
ext = determine_ext(video_url)
if ext == 'smil':
- entries.append({
- 'id': video_id,
- 'formats': self._extract_smil_formats(video_url, video_id),
- 'uploader': video_uploader,
- 'title': video_title,
- 'age_limit': age_limit,
- })
+ entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
elif ext == 'xspf':
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
elif ext == 'xspf':
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
+ elif ext == 'm3u8':
+ entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
else:
else:
- entries.append({
- 'id': video_id,
- 'url': video_url,
- 'uploader': video_uploader,
- 'title': video_title,
- 'age_limit': age_limit,
- })
+ entry_info_dict['url'] = video_url
+
+ entries.append(entry_info_dict)
if len(entries) == 1:
return entries[0]
if len(entries) == 1:
return entries[0]