[huffpost] Add support

[youtube-dl] / youtube_dl / extractor / common.py
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 56c54a5ce2627ecc9d488fbe12901c689fefeb3b..db1ca9edb446568479d3770604b42ae66c2ddd75 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -66,11 +66,12 @@ class InfoExtractor(object):
                      * asr        Audio sampling rate in Hertz
                      * vbr        Average video bitrate in KBit/s
                      * vcodec     Name of the video codec in use
+                    * container  Name of the container format
                      * filesize   The number of bytes, if known in advance
                      * player_url SWF Player URL (used for rtmpdump).
                      * protocol   The protocol that will be used for the actual
                                   download, lower-case.
-                                 "http", "https", "rtsp", "rtmp" or so.
+                                 "http", "https", "rtsp", "rtmp", "m3u8" or so.
                      * preference Order number of this format. If this field is
                                   present and not None, the formats get sorted
                                   by this field.
@@ -221,6 +222,8 @@ class InfoExtractor(object):
                            webpage_bytes[:1024])
              if m:
                  encoding = m.group(1).decode('ascii')
+            elif webpage_bytes.startswith(b'\xff\xfe'):
+                encoding = 'utf-16'
              else:
                  encoding = 'utf-8'
          if self._downloader.params.get('dump_intermediate_pages', False):
@@ -237,7 +240,7 @@ class InfoExtractor(object):
              except AttributeError:
                  url = url_or_request
              if len(url) > 200:
-                h = hashlib.md5(url).hexdigest()
+                h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest()
                  url = url[:200 - len(h)] + h
              raw_filename = ('%s_%s.dump' % (video_id, url))
              filename = sanitize_filename(raw_filename, restricted=True)