projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
[FreeVideo] Add new extractor
[youtube-dl]
/
youtube_dl
/
extractor
/
common.py
diff --git
a/youtube_dl/extractor/common.py
b/youtube_dl/extractor/common.py
index 450c7dfd69d0000c810f18ef35741aae05221c40..b77f0e51904d1539a43c056dafbc82a587041744 100644
(file)
--- a/
youtube_dl/extractor/common.py
+++ b/
youtube_dl/extractor/common.py
@@
-12,13
+12,14
@@
import sys
import time
import xml.etree.ElementTree
import time
import xml.etree.ElementTree
-from ..
utils
import (
+from ..
compat
import (
compat_http_client,
compat_urllib_error,
compat_urllib_parse_urlparse,
compat_urlparse,
compat_str,
compat_http_client,
compat_urllib_error,
compat_urllib_parse_urlparse,
compat_urlparse,
compat_str,
-
+)
+from ..utils import (
clean_html,
compiled_regex_type,
ExtractorError,
clean_html,
compiled_regex_type,
ExtractorError,
@@
-72,6
+73,7
@@
class InfoExtractor(object):
* acodec Name of the audio codec in use
* asr Audio sampling rate in Hertz
* vbr Average video bitrate in KBit/s
* acodec Name of the audio codec in use
* asr Audio sampling rate in Hertz
* vbr Average video bitrate in KBit/s
+ * fps Frame rate
* vcodec Name of the video codec in use
* container Name of the container format
* filesize The number of bytes, if known in advance
* vcodec Name of the video codec in use
* container Name of the container format
* filesize The number of bytes, if known in advance
@@
-89,6
+91,10
@@
class InfoExtractor(object):
format, irrespective of the file format.
-1 for default (order by other properties),
-2 or smaller for less than default.
format, irrespective of the file format.
-1 for default (order by other properties),
-2 or smaller for less than default.
+ * source_preference Order number for this video source
+ (quality takes higher priority)
+ -1 for default (order by other properties),
+ -2 or smaller for less than default.
* http_referer HTTP Referer header value to set.
* http_method HTTP method to use for the download.
* http_headers A dictionary of additional HTTP headers
* http_referer HTTP Referer header value to set.
* http_method HTTP method to use for the download.
* http_headers A dictionary of additional HTTP headers
@@
-238,7
+244,6
@@
class InfoExtractor(object):
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns a tuple (page content as string, URL handle) """
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns a tuple (page content as string, URL handle) """
-
# Strip hashes from the URL (#1038)
if isinstance(url_or_request, (compat_str, str)):
url_or_request = url_or_request.partition('#')[0]
# Strip hashes from the URL (#1038)
if isinstance(url_or_request, (compat_str, str)):
url_or_request = url_or_request.partition('#')[0]
@@
-247,6
+252,10
@@
class InfoExtractor(object):
if urlh is False:
assert not fatal
return False
if urlh is False:
assert not fatal
return False
+ content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
+ return (content, urlh)
+
+ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True):
content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read()
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read()
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
@@
-281,6
+290,12
@@
class InfoExtractor(object):
raw_filename = basen + '.dump'
filename = sanitize_filename(raw_filename, restricted=True)
self.to_screen('Saving request to ' + filename)
raw_filename = basen + '.dump'
filename = sanitize_filename(raw_filename, restricted=True)
self.to_screen('Saving request to ' + filename)
+ # Working around MAX_PATH limitation on Windows (see
+ # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
+ if os.name == 'nt':
+ absfilepath = os.path.abspath(filename)
+ if len(absfilepath) > 259:
+ filename = '\\\\?\\' + absfilepath
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
@@
-299,7
+314,7
@@
class InfoExtractor(object):
msg += ' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True)
msg += ' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True)
- return
(content, urlh)
+ return
content
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns the data of the page as a string """
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns the data of the page as a string """
@@
-389,7
+404,7
@@
class InfoExtractor(object):
video_info['title'] = playlist_title
return video_info
video_info['title'] = playlist_title
return video_info
- def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
+ def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0
, group=None
):
"""
Perform a regex search on the given string, using a single or a list of
patterns returning the first matching group.
"""
Perform a regex search on the given string, using a single or a list of
patterns returning the first matching group.
@@
-410,8
+425,11
@@
class InfoExtractor(object):
_name = name
if mobj:
_name = name
if mobj:
- # return the first matching group
- return next(g for g in mobj.groups() if g is not None)
+ if group is None:
+ # return the first matching group
+ return next(g for g in mobj.groups() if g is not None)
+ else:
+ return mobj.group(group)
elif default is not _NO_DEFAULT:
return default
elif fatal:
elif default is not _NO_DEFAULT:
return default
elif fatal:
@@
-421,11
+439,11
@@
class InfoExtractor(object):
'please report this issue on http://yt-dl.org/bug' % _name)
return None
'please report this issue on http://yt-dl.org/bug' % _name)
return None
- def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
+ def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0
, group=None
):
"""
Like _search_regex, but strips HTML tags and unescapes entities.
"""
"""
Like _search_regex, but strips HTML tags and unescapes entities.
"""
- res = self._search_regex(pattern, string, name, default, fatal, flags)
+ res = self._search_regex(pattern, string, name, default, fatal, flags
, group
)
if res:
return clean_html(res).strip()
else:
if res:
return clean_html(res).strip()
else:
@@
-519,9
+537,9
@@
class InfoExtractor(object):
display_name = name
return self._html_search_regex(
r'''(?ix)<meta
display_name = name
return self._html_search_regex(
r'''(?ix)<meta
- (?=[^>]+(?:itemprop|name|property)=
["\']?%s["\']?
)
- [^>]+content=
["\']([^"\']+)["\']
''' % re.escape(name),
- html, display_name, fatal=fatal, **kwargs)
+ (?=[^>]+(?:itemprop|name|property)=
(["\']?)%s\1
)
+ [^>]+content=
(["\'])(?P<content>.*?)\1
''' % re.escape(name),
+ html, display_name, fatal=fatal,
group='content',
**kwargs)
def _dc_search_uploader(self, html):
return self._html_search_meta('dc.creator', html, 'uploader')
def _dc_search_uploader(self, html):
return self._html_search_meta('dc.creator', html, 'uploader')
@@
-605,14
+623,16
@@
class InfoExtractor(object):
f.get('vbr') if f.get('vbr') is not None else -1,
f.get('abr') if f.get('abr') is not None else -1,
audio_ext_preference,
f.get('vbr') if f.get('vbr') is not None else -1,
f.get('abr') if f.get('abr') is not None else -1,
audio_ext_preference,
+ f.get('fps') if f.get('fps') is not None else -1,
f.get('filesize') if f.get('filesize') is not None else -1,
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
f.get('filesize') if f.get('filesize') is not None else -1,
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
+ f.get('source_preference') if f.get('source_preference') is not None else -1,
f.get('format_id'),
)
formats.sort(key=_formats_key)
def http_scheme(self):
f.get('format_id'),
)
formats.sort(key=_formats_key)
def http_scheme(self):
- """ Either "http
s
:" or "https:", depending on the user's preferences """
+ """ Either "http:" or "https:", depending on the user's preferences """
return (
'http:'
if self._downloader.params.get('prefer_insecure', False)
return (
'http:'
if self._downloader.params.get('prefer_insecure', False)
@@
-675,7
+695,10
@@
class InfoExtractor(object):
if re.match(r'^https?://', u)
else compat_urlparse.urljoin(m3u8_url, u))
if re.match(r'^https?://', u)
else compat_urlparse.urljoin(m3u8_url, u))
- m3u8_doc = self._download_webpage(m3u8_url, video_id)
+ m3u8_doc = self._download_webpage(
+ m3u8_url, video_id,
+ note='Downloading m3u8 information',
+ errnote='Failed to download m3u8 information')
last_info = None
kv_rex = re.compile(
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
last_info = None
kv_rex = re.compile(
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')