Merge remote-tracking branch 'diffycat/jpopsuki'
authorPhilipp Hagemeister <phihag@phihag.de>
Fri, 3 Jan 2014 11:20:18 +0000 (12:20 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Fri, 3 Jan 2014 11:20:18 +0000 (12:20 +0100)
19 files changed:
README.md
test/test_playlists.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/downloader/http.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/cmt.py [new file with mode: 0644]
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/common.py
youtube_dl/extractor/dreisat.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/imdb.py
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/wistia.py
youtube_dl/extractor/youtube.py
youtube_dl/utils.py
youtube_dl/version.py

index 91e18e372b9a803dc7358f2e6d4af3b31eb754a5..0070617d4dce5bdd0ff3628bd232313f287498ad 100644 (file)
--- a/README.md
+++ b/README.md
@@ -34,9 +34,11 @@ which means you can modify it, redistribute it or use it however you like.
                                empty string (--proxy "") for direct connection
     --no-check-certificate     Suppress HTTPS certificate validation.
     --cache-dir DIR            Location in the filesystem where youtube-dl can
-                               store downloaded information permanently. By
+                               store some downloaded information permanently. By
                                default $XDG_CACHE_HOME/youtube-dl or ~/.cache
-                               /youtube-dl .
+                               /youtube-dl . At the moment, only YouTube player
+                               files (for videos with obfuscated signatures) are
+                               cached, but that may change.
     --no-cache-dir             Disable filesystem caching
     --bidi-workaround          Work around terminals that lack bidirectional
                                text support. Requires bidiv or fribidi
@@ -335,3 +337,7 @@ In particular, every site support request issue should only pertain to services
 ###  Is anyone going to need the feature?
 
 Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
+
+###  Is your question about youtube-dl?
+
+It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
index 1b7b4e3d808cb936fa5fac07136049bd174a4490..9d522b3574c221bf6594cd9965ea17bf47b671c6 100644 (file)
@@ -28,7 +28,8 @@ from youtube_dl.extractor import (
     BandcampAlbumIE,
     SmotriCommunityIE,
     SmotriUserIE,
-    IviCompilationIE
+    IviCompilationIE,
+    ImdbListIE,
 )
 
 
@@ -187,6 +188,15 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['id'], u'dezhurnyi_angel/season2')
         self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон')
         self.assertTrue(len(result['entries']) >= 20)
+        
+    def test_imdb_list(self):
+        dl = FakeYDL()
+        ie = ImdbListIE(dl)
+        result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], u'sMjedvGDd8U')
+        self.assertEqual(result['title'], u'Animated and Family Films')
+        self.assertTrue(len(result['entries']) >= 48)
 
 
 if __name__ == '__main__':
index a9a3639d7f7a32053990f0b41e487b837a704767..08037deda48c05c74e40e989fee4af34887ae6fa 100644 (file)
@@ -148,6 +148,7 @@ class YoutubeDL(object):
     socket_timeout:    Time to wait for unresponsive hosts, in seconds
     bidi_workaround:   Work around buggy terminals without bidirectional text
                        support, using fridibi
+    debug_printtraffic:Print out sent and received HTTP traffic
 
     The following parameters are not used by YoutubeDL itself, they are used by
     the FileDownloader:
@@ -164,6 +165,8 @@ class YoutubeDL(object):
 
     def __init__(self, params=None):
         """Create a FileDownloader object with the given options."""
+        if params is None:
+            params = {}
         self._ies = []
         self._ies_instances = {}
         self._pps = []
@@ -172,7 +175,7 @@ class YoutubeDL(object):
         self._num_downloads = 0
         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
         self._err_file = sys.stderr
-        self.params = {} if params is None else params
+        self.params = params
 
         if params.get('bidi_workaround', False):
             try:
@@ -1014,7 +1017,7 @@ class YoutubeDL(object):
     def list_formats(self, info_dict):
         def format_note(fdict):
             res = u''
-            if f.get('ext') in ['f4f', 'f4m']:
+            if fdict.get('ext') in ['f4f', 'f4m']:
                 res += u'(unsupported) '
             if fdict.get('format_note') is not None:
                 res += fdict['format_note'] + u' '
@@ -1124,10 +1127,13 @@ class YoutubeDL(object):
             if 'http' in proxies and 'https' not in proxies:
                 proxies['https'] = proxies['http']
         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+
+        debuglevel = 1 if self.params.get('debug_printtraffic') else 0
         https_handler = make_HTTPS_handler(
-            self.params.get('nocheckcertificate', False))
+            self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
+        ydlh = YoutubeDLHandler(debuglevel=debuglevel)
         opener = compat_urllib_request.build_opener(
-            https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
+            https_handler, proxy_handler, cookie_processor, ydlh)
         # Delete the default user-agent header, which would otherwise apply in
         # cases where our custom HTTP handler doesn't come into play
         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
index 657e3fd07eb6dd92441c6fd11613a208848adc5a..b29cf6758d0354e6ff34b8823ea1606544a7f5a1 100644 (file)
@@ -186,7 +186,7 @@ def parseOpts(overrideArguments=None):
     general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
     general.add_option(
         '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
-        help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .')
+        help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
     general.add_option(
         '--no-cache-dir', action='store_const', const=None, dest='cachedir',
         help='Disable filesystem caching')
@@ -334,7 +334,9 @@ def parseOpts(overrideArguments=None):
     verbosity.add_option('--youtube-print-sig-code',
             action='store_true', dest='youtube_print_sig_code', default=False,
             help=optparse.SUPPRESS_HELP)
-
+    verbosity.add_option('--print-traffic',
+            dest='debug_printtraffic', action='store_true', default=False,
+            help=optparse.SUPPRESS_HELP)
 
     filesystem.add_option('-t', '--title',
             action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
@@ -696,6 +698,7 @@ def _real_main(argv=None):
         'proxy': opts.proxy,
         'socket_timeout': opts.socket_timeout,
         'bidi_workaround': opts.bidi_workaround,
+        'debug_printtraffic': opts.debug_printtraffic,
     }
 
     with YoutubeDL(ydl_opts) as ydl:
index 14b88efd3d951345effe95959f72a82575e094d5..8407727ba43df75a31a53d073cbd430877157b29 100644 (file)
@@ -133,7 +133,7 @@ class HttpFD(FileDownloader):
                     return False
             try:
                 stream.write(data_block)
-            except (IOError, OSError):
+            except (IOError, OSError) as err:
                 self.to_stderr(u"\n")
                 self.report_error(u'unable to write data: %s' % str(err))
                 return False
index b59110b15d58af909fd9b08c4b36164dc69b7174..9c1374373a3adeb1f64904db9d2900979a7fe7d6 100644 (file)
@@ -28,6 +28,7 @@ from .channel9 import Channel9IE
 from .cinemassacre import CinemassacreIE
 from .clipfish import ClipfishIE
 from .clipsyndicate import ClipsyndicateIE
+from .cmt import CMTIE
 from .cnn import CNNIE
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
@@ -79,7 +80,10 @@ from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
 from .hypem import HypemIE
 from .ign import IGNIE, OneUPIE
-from .imdb import ImdbIE
+from .imdb import (
+    ImdbIE,
+    ImdbListIE
+)
 from .ina import InaIE
 from .infoq import InfoQIE
 from .instagram import InstagramIE
diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py
new file mode 100644 (file)
index 0000000..88e0e9a
--- /dev/null
@@ -0,0 +1,19 @@
+from .mtv import MTVIE
+
+class CMTIE(MTVIE):
+    IE_NAME = u'cmt.com'
+    _VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml'
+    _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
+
+    _TESTS = [
+        {
+            u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
+            u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2',
+            u'info_dict': {
+                u'id': u'989124',
+                u'ext': u'mp4',
+                u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
+                u'description': u'Blame It All On My Roots',
+            },
+        },
+    ]
index a54ce3ee7c44727a9e56b1ab8359bd099b48bb35..27bd8256e6bf6dfb8dc7ae7997a29c2162d863d2 100644 (file)
@@ -12,7 +12,9 @@ from ..utils import (
 
 
 class ComedyCentralIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
+    _VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/
+        (video-clips|episodes|cc-studios|video-collections)
+        /(?P<title>.*)'''
     _FEED_URL = u'http://comedycentral.com/feeds/mrss/'
 
     _TEST = {
index f34d36cb05fb69533a3dd8ed2de722cb6ada9eea..f7247752e0c510367a3ce342546790d6b57ef922 100644 (file)
@@ -69,7 +69,8 @@ class InfoExtractor(object):
                                  download, lower-case.
                                  "http", "https", "rtsp", "rtmp" or so.
                     * preference Order number of this format. If this field is
-                                 present, the formats get sorted by this field.
+                                 present and not None, the formats get sorted
+                                 by this field.
                                  -1 for default (order by other properties),
                                  -2 or smaller for less than default.
     url:            Final video URL.
index 416e25156e321333d1896983ced60a1e85c16ada..0b11d1f10e18e4358b35f76d0a0e0816b00eaa4c 100644 (file)
@@ -10,11 +10,11 @@ from ..utils import (
 
 class DreiSatIE(InfoExtractor):
     IE_NAME = '3sat'
-    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
+    _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
     _TEST = {
         u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
-        u'file': u'36983.webm',
-        u'md5': u'57c97d0469d71cf874f6815aa2b7c944',
+        u'file': u'36983.mp4',
+        u'md5': u'9dcfe344732808dbfcc901537973c922',
         u'info_dict': {
             u"title": u"Kaffeeland Schweiz",
             u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...", 
index 7a14c98f9b6ef9d550606c72c330d0730ec1233e..377ae91c4383c78c0749937650097d231a7f2ca8 100644 (file)
@@ -162,6 +162,8 @@ class GenericIE(InfoExtractor):
             return self.url_result('http://' + url)
         video_id = os.path.splitext(url.split('/')[-1])[0]
 
+        self.to_screen(u'%s: Requesting header' % video_id)
+
         try:
             response = self._send_head(url)
 
index e5332cce820ca239c915da402107a77143f0484b..16926b4d391bdc11801510797c26481610b928e3 100644 (file)
@@ -55,3 +55,32 @@ class ImdbIE(InfoExtractor):
             'description': descr,
             'thumbnail': format_info['slate'],
         }
+
+class ImdbListIE(InfoExtractor):
+    IE_NAME = u'imdb:list'
+    IE_DESC = u'Internet Movie Database lists'
+    _VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
+    
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        list_id = mobj.group('id')
+        
+        # RSS XML is sometimes malformed
+        rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, u'Downloading list RSS')
+        list_title = self._html_search_regex(r'<title>(.*?)</title>', rss, u'list title')
+        
+        # Export is independent of actual author_id, but returns 404 if no author_id is provided.
+        # However, passing dummy author_id seems to be enough.
+        csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id,
+                                     list_id, u'Downloading list CSV')
+        
+        entries = []
+        for item in csv.split('\n')[1:]:
+            cols = item.split(',')
+            if len(cols) < 2:
+                continue
+            item_id = cols[1][1:-1]
+            if item_id.startswith('vi'):
+                entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb'))
+        
+        return self.playlist_result(entries, list_id, list_title)
\ No newline at end of file
index 125d81551c26ea67eff82f2d2189bd058d16b873..7c54ea0f4c7f351161adae175edca2743ab55266 100644 (file)
@@ -53,7 +53,7 @@ class MixcloudIE(InfoExtractor):
         info = json.loads(json_data)
 
         preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
-        song_url = preview_url.replace('/previews/', '/cloudcasts/originals/')
+        song_url = preview_url.replace('/previews/', '/c/originals/')
         template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
         final_song_url = self._get_url(template_url)
         if final_song_url is None:
index ed11f521aa02aa3fe421b8fc743b0a26b1e1cdd0..f1cf41e2dbf2012764fdb0f2e1745c07ecdef055 100644 (file)
@@ -129,7 +129,7 @@ class MTVIE(MTVServicesInfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('videoid')
-        uri = mobj.group('mgid')
+        uri = mobj.groupdict().get('mgid')
         if uri is None:
             webpage = self._download_webpage(url, video_id)
     
index e22ff9c387ab0e01c1e6fcb1da793af877f37a5c..951e977bd0ba014340fe3eeb626723bde258e0dd 100644 (file)
@@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor):
                             (?!sets/)(?P<title>[\w\d-]+)/?
                             (?P<token>[^?]+?)?(?:[?].*)?$)
                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
-                       |(?P<widget>w\.soundcloud\.com/player/?.*?url=.*)
+                       |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
                     )
                     '''
     IE_NAME = u'soundcloud'
@@ -193,7 +193,7 @@ class SoundcloudIE(InfoExtractor):
         if track_id is not None:
             info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
             full_title = track_id
-        elif mobj.group('widget'):
+        elif mobj.group('player'):
             query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
             return self.url_result(query['url'][0], ie='Soundcloud')
         else:
index 584550455ad8dbf611424ad6606411850dd72d3e..bc31c2e64f22999adf575e60d59bde3d903bb9cc 100644 (file)
@@ -44,6 +44,7 @@ class WistiaIE(InfoExtractor):
                 'height': a['height'],
                 'filesize': a['size'],
                 'ext': a['ext'],
+                'preference': 1 if atype == 'original' else None,
             })
 
         self._sort_formats(formats)
index b0e29c2a8a5d8c7f6c4c0109ca09afa204d3b30b..9424d5e2669a72e791a0ba0a0120de0bfec27fc8 100644 (file)
@@ -194,6 +194,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
         '138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40},
         '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40},
+        '264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
 
         # Dash mp4 audio
         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
index 83a274043388ed9acedf175ab6ebb7783e1fbfc6..da5143c8ef50eb936abc4a7de98c33f7aa10c5b5 100644 (file)
@@ -539,7 +539,8 @@ def formatSeconds(secs):
     else:
         return '%d' % secs
 
-def make_HTTPS_handler(opts_no_check_certificate):
+
+def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
     if sys.version_info < (3, 2):
         import httplib
 
@@ -560,7 +561,7 @@ def make_HTTPS_handler(opts_no_check_certificate):
         class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
             def https_open(self, req):
                 return self.do_open(HTTPSConnectionV3, req)
-        return HTTPSHandlerV3()
+        return HTTPSHandlerV3(**kwargs)
     else:
         context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
         context.verify_mode = (ssl.CERT_NONE
@@ -571,7 +572,7 @@ def make_HTTPS_handler(opts_no_check_certificate):
             context.load_default_certs()
         except AttributeError:
             pass  # Python < 3.4
-        return compat_urllib_request.HTTPSHandler(context=context)
+        return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
 
 class ExtractorError(Exception):
     """Error during info extraction."""
index 332913b31ff19bbea6c743e3bca333d88d39722a..bf5fc8212660cdbe1004836bfb7f6da301b1df21 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2013.12.26'
+__version__ = '2014.01.03'