projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Switch codebase to use sanitized_Request instead of
[youtube-dl]
/
youtube_dl
/
extractor
/
common.py
diff --git
a/youtube_dl/extractor/common.py
b/youtube_dl/extractor/common.py
index 720033ddf04347caefd22e031466708f74c793b8..eb9bfa3d15a2c5084fbf67f05a401474ad2f881d 100644
(file)
--- a/
youtube_dl/extractor/common.py
+++ b/
youtube_dl/extractor/common.py
@@
-10,20
+10,18
@@
import re
import socket
import sys
import time
import socket
import sys
import time
-import xml.etree.ElementTree
from ..compat import (
compat_cookiejar,
compat_cookies,
compat_getpass,
from ..compat import (
compat_cookiejar,
compat_cookies,
compat_getpass,
- compat_HTTPError,
compat_http_client,
compat_urllib_error,
compat_urllib_parse,
compat_urllib_parse_urlparse,
compat_http_client,
compat_urllib_error,
compat_urllib_parse,
compat_urllib_parse_urlparse,
- compat_urllib_request,
compat_urlparse,
compat_str,
compat_urlparse,
compat_str,
+ compat_etree_fromstring,
)
from ..utils import (
NO_DEFAULT,
)
from ..utils import (
NO_DEFAULT,
@@
-38,6
+36,7
@@
from ..utils import (
int_or_none,
RegexNotFoundError,
sanitize_filename,
int_or_none,
RegexNotFoundError,
sanitize_filename,
+ sanitized_Request,
unescapeHTML,
unified_strdate,
url_basename,
unescapeHTML,
unified_strdate,
url_basename,
@@
-311,11
+310,11
@@
class InfoExtractor(object):
@classmethod
def ie_key(cls):
"""A string for getting the InfoExtractor with get_info_extractor"""
@classmethod
def ie_key(cls):
"""A string for getting the InfoExtractor with get_info_extractor"""
- return c
ls.__name__[:-2]
+ return c
ompat_str(cls.__name__[:-2])
@property
def IE_NAME(self):
@property
def IE_NAME(self):
- return
type(self).__name__[:-2]
+ return
compat_str(type(self).__name__[:-2])
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns the response handle """
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns the response handle """
@@
-462,7
+461,7
@@
class InfoExtractor(object):
return xml_string
if transform_source:
xml_string = transform_source(xml_string)
return xml_string
if transform_source:
xml_string = transform_source(xml_string)
- return
xml.etree.ElementTree.
fromstring(xml_string.encode('utf-8'))
+ return
compat_etree_
fromstring(xml_string.encode('utf-8'))
def _download_json(self, url_or_request, video_id,
note='Downloading JSON metadata',
def _download_json(self, url_or_request, video_id,
note='Downloading JSON metadata',
@@
-892,6
+891,11
@@
class InfoExtractor(object):
if not media_nodes:
manifest_version = '2.0'
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
if not media_nodes:
manifest_version = '2.0'
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
+ base_url = xpath_text(
+ manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
+ 'base URL', default=None)
+ if base_url:
+ base_url = base_url.strip()
for i, media_el in enumerate(media_nodes):
if manifest_version == '2.0':
media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
for i, media_el in enumerate(media_nodes):
if manifest_version == '2.0':
media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
@@
-899,7
+903,7
@@
class InfoExtractor(object):
continue
manifest_url = (
media_url if media_url.startswith('http://') or media_url.startswith('https://')
continue
manifest_url = (
media_url if media_url.startswith('http://') or media_url.startswith('https://')
- else (
'/'.join(manifest_url.split('/')[:-1]
) + '/' + media_url))
+ else (
(base_url or '/'.join(manifest_url.split('/')[:-1])
) + '/' + media_url))
# If media_url is itself a f4m manifest do the recursive extraction
# since bitrates in parent manifest (this one) and media_url manifest
# may differ leading to inability to resolve the format by requested
# If media_url is itself a f4m manifest do the recursive extraction
# since bitrates in parent manifest (this one) and media_url manifest
# may differ leading to inability to resolve the format by requested
@@
-944,13
+948,15
@@
class InfoExtractor(object):
if re.match(r'^https?://', u)
else compat_urlparse.urljoin(m3u8_url, u))
if re.match(r'^https?://', u)
else compat_urlparse.urljoin(m3u8_url, u))
-
m3u8_doc = self._download_webpag
e(
+
res = self._download_webpage_handl
e(
m3u8_url, video_id,
note=note or 'Downloading m3u8 information',
errnote=errnote or 'Failed to download m3u8 information',
fatal=fatal)
m3u8_url, video_id,
note=note or 'Downloading m3u8 information',
errnote=errnote or 'Failed to download m3u8 information',
fatal=fatal)
- if m3u8_doc is False:
- return m3u8_doc
+ if res is False:
+ return res
+ m3u8_doc, urlh = res
+ m3u8_url = urlh.geturl()
last_info = None
last_media = None
kv_rex = re.compile(
last_info = None
last_media = None
kv_rex = re.compile(
@@
-1279,7
+1285,7
@@
class InfoExtractor(object):
def _get_cookies(self, url):
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
def _get_cookies(self, url):
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
- req =
compat_urllib_request.
Request(url)
+ req =
sanitized_
Request(url)
self._downloader.cookiejar.add_cookie_header(req)
return compat_cookies.SimpleCookie(req.get_header('Cookie'))
self._downloader.cookiejar.add_cookie_header(req)
return compat_cookies.SimpleCookie(req.get_header('Cookie'))