Merge pull request #8819 from remitamine/simple-webpage-requests
authorremitamine <remitamine@gmail.com>
Fri, 11 Mar 2016 17:19:43 +0000 (18:19 +0100)
committerremitamine <remitamine@gmail.com>
Fri, 11 Mar 2016 17:19:43 +0000 (18:19 +0100)
[extractor/common] simplify using data, headers and query params with _download_* methods

1  2 
youtube_dl/extractor/common.py

index 52b4c125aa649fa0ca19b9f0d292d6af7fa04c32,d2443c93ceaaead83805a72bbefc7a3a70683c0d..aaca25a12a872eabfe3f43734e0599e7e3d4d78e
@@@ -48,6 -48,7 +48,7 @@@ from ..utils import 
      determine_protocol,
      parse_duration,
      mimetype2ext,
+     update_url_query,
  )
  
  
@@@ -345,7 -346,7 +346,7 @@@ class InfoExtractor(object)
      def IE_NAME(self):
          return compat_str(type(self).__name__[:-2])
  
-     def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
+     def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None):
          """ Returns the response handle """
          if note is None:
              self.report_download_webpage(video_id)
                  self.to_screen('%s' % (note,))
              else:
                  self.to_screen('%s: %s' % (video_id, note))
+         # data, headers and query params will be ignored for `Request` objects
+         if isinstance(url_or_request, compat_str):
+             if query:
+                 url_or_request = update_url_query(url_or_request, query)
+             if data or headers:
+                 url_or_request = sanitized_Request(url_or_request, data, headers or {})
          try:
              return self._downloader.urlopen(url_or_request)
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                  self._downloader.report_warning(errmsg)
                  return False
  
-     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None):
+     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers=None, query=None):
          """ Returns a tuple (page content as string, URL handle) """
          # Strip hashes from the URL (#1038)
          if isinstance(url_or_request, (compat_str, str)):
              url_or_request = url_or_request.partition('#')[0]
  
-         urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal)
+         urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
          if urlh is False:
              assert not fatal
              return False
  
          return content
  
-     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
+     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers=None, query=None):
          """ Returns the data of the page as a string """
          success = False
          try_count = 0
          while success is False:
              try:
-                 res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding)
+                 res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding, data=data, headers=headers, query=query)
                  success = True
              except compat_http_client.IncompleteRead as e:
                  try_count += 1
  
      def _download_xml(self, url_or_request, video_id,
                        note='Downloading XML', errnote='Unable to download XML',
-                       transform_source=None, fatal=True, encoding=None):
+                       transform_source=None, fatal=True, encoding=None, data=None, headers=None, query=None):
          """Return the xml as an xml.etree.ElementTree.Element"""
          xml_string = self._download_webpage(
-             url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding)
+             url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query)
          if xml_string is False:
              return xml_string
          if transform_source:
                         note='Downloading JSON metadata',
                         errnote='Unable to download JSON metadata',
                         transform_source=None,
-                        fatal=True, encoding=None):
+                        fatal=True, encoding=None, data=None, headers=None, query=None):
          json_string = self._download_webpage(
              url_or_request, video_id, note, errnote, fatal=fatal,
-             encoding=encoding)
+             encoding=encoding, data=data, headers=headers, query=query)
          if (not fatal) and json_string is False:
              return None
          return self._parse_json(
                          continue
                      representation_attrib = adaptation_set.attrib.copy()
                      representation_attrib.update(representation.attrib)
 -                    mime_type = representation_attrib.get('mimeType')
 -                    content_type = mime_type.split('/')[0] if mime_type else representation_attrib.get('contentType')
 +                    # According to page 41 of ISO/IEC 29001-1:2014, @mimeType is mandatory
 +                    mime_type = representation_attrib['mimeType']
 +                    content_type = mime_type.split('/')[0]
                      if content_type == 'text':
                          # TODO implement WebVTT downloading
                          pass
                          f = {
                              'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
                              'url': base_url,
 +                            'ext': mimetype2ext(mime_type),
                              'width': int_or_none(representation_attrib.get('width')),
                              'height': int_or_none(representation_attrib.get('height')),
                              'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),