Merge pull request #1438 from rzhxeo/fktv
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Wed, 18 Sep 2013 21:04:54 +0000 (23:04 +0200)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Wed, 18 Sep 2013 21:05:56 +0000 (23:05 +0200)
Add support for http://fernsehkritik.tv

28 files changed:
devscripts/buildserver.py [new file with mode: 0644]
devscripts/release.sh
devscripts/youtube_genalgo.py
test/test_all_urls.py
test/test_dailymotion_subtitles.py
test/test_playlists.py
test/test_youtube_subtitles.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/archiveorg.py
youtube_dl/extractor/bloomberg.py [new file with mode: 0644]
youtube_dl/extractor/dreisat.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/googleplus.py
youtube_dl/extractor/hotnewhiphop.py
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/newgrounds.py [new file with mode: 0644]
youtube_dl/extractor/ooyala.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/southparkstudios.py [new file with mode: 0644]
youtube_dl/extractor/subtitles.py
youtube_dl/extractor/trilulilu.py
youtube_dl/extractor/vice.py [new file with mode: 0644]
youtube_dl/extractor/xhamster.py
youtube_dl/extractor/youtube.py
youtube_dl/utils.py
youtube_dl/version.py

diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py
new file mode 100644 (file)
index 0000000..e0c3cc8
--- /dev/null
@@ -0,0 +1,405 @@
+#!/usr/bin/python3
+
+from http.server import HTTPServer, BaseHTTPRequestHandler
+from socketserver import ThreadingMixIn
+import argparse
+import ctypes
+import functools
+import sys
+import threading
+import traceback
+import os.path
+
+
+class BuildHTTPServer(ThreadingMixIn, HTTPServer):
+    allow_reuse_address = True
+
+
+advapi32 = ctypes.windll.advapi32
+
+SC_MANAGER_ALL_ACCESS = 0xf003f
+SC_MANAGER_CREATE_SERVICE = 0x02
+SERVICE_WIN32_OWN_PROCESS = 0x10
+SERVICE_AUTO_START = 0x2
+SERVICE_ERROR_NORMAL = 0x1
+DELETE = 0x00010000
+SERVICE_STATUS_START_PENDING = 0x00000002
+SERVICE_STATUS_RUNNING = 0x00000004
+SERVICE_ACCEPT_STOP = 0x1
+
+SVCNAME = 'youtubedl_builder'
+
+LPTSTR = ctypes.c_wchar_p
+START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR))
+
+
+class SERVICE_TABLE_ENTRY(ctypes.Structure):
+    _fields_ = [
+        ('lpServiceName', LPTSTR),
+        ('lpServiceProc', START_CALLBACK)
+    ]
+
+
+HandlerEx = ctypes.WINFUNCTYPE(
+    ctypes.c_int,     # return
+    ctypes.c_int,     # dwControl
+    ctypes.c_int,     # dwEventType
+    ctypes.c_void_p,  # lpEventData,
+    ctypes.c_void_p,  # lpContext,
+)
+
+
+def _ctypes_array(c_type, py_array):
+    ar = (c_type * len(py_array))()
+    ar[:] = py_array
+    return ar
+
+
+def win_OpenSCManager():
+    res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS)
+    if not res:
+        raise Exception('Opening service manager failed - '
+                        'are you running this as administrator?')
+    return res
+
+
+def win_install_service(service_name, cmdline):
+    manager = win_OpenSCManager()
+    try:
+        h = advapi32.CreateServiceW(
+            manager, service_name, None,
+            SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS,
+            SERVICE_AUTO_START, SERVICE_ERROR_NORMAL,
+            cmdline, None, None, None, None, None)
+        if not h:
+            raise OSError('Service creation failed: %s' % ctypes.FormatError())
+
+        advapi32.CloseServiceHandle(h)
+    finally:
+        advapi32.CloseServiceHandle(manager)
+
+
+def win_uninstall_service(service_name):
+    manager = win_OpenSCManager()
+    try:
+        h = advapi32.OpenServiceW(manager, service_name, DELETE)
+        if not h:
+            raise OSError('Could not find service %s: %s' % (
+                service_name, ctypes.FormatError()))
+
+        try:
+            if not advapi32.DeleteService(h):
+                raise OSError('Deletion failed: %s' % ctypes.FormatError())
+        finally:
+            advapi32.CloseServiceHandle(h)
+    finally:
+        advapi32.CloseServiceHandle(manager)
+
+
+def win_service_report_event(service_name, msg, is_error=True):
+    with open('C:/sshkeys/log', 'a', encoding='utf-8') as f:
+        f.write(msg + '\n')
+
+    event_log = advapi32.RegisterEventSourceW(None, service_name)
+    if not event_log:
+        raise OSError('Could not report event: %s' % ctypes.FormatError())
+
+    try:
+        type_id = 0x0001 if is_error else 0x0004
+        event_id = 0xc0000000 if is_error else 0x40000000
+        lines = _ctypes_array(LPTSTR, [msg])
+
+        if not advapi32.ReportEventW(
+                event_log, type_id, 0, event_id, None, len(lines), 0,
+                lines, None):
+            raise OSError('Event reporting failed: %s' % ctypes.FormatError())
+    finally:
+        advapi32.DeregisterEventSource(event_log)
+
+
+def win_service_handler(stop_event, *args):
+    try:
+        raise ValueError('Handler called with args ' + repr(args))
+        TODO
+    except Exception as e:
+        tb = traceback.format_exc()
+        msg = str(e) + '\n' + tb
+        win_service_report_event(service_name, msg, is_error=True)
+        raise
+
+
+def win_service_set_status(handle, status_code):
+    svcStatus = SERVICE_STATUS()
+    svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS
+    svcStatus.dwCurrentState = status_code
+    svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP
+
+    svcStatus.dwServiceSpecificExitCode = 0
+
+    if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)):
+        raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError())
+
+
+def win_service_main(service_name, real_main, argc, argv_raw):
+    try:
+        #args = [argv_raw[i].value for i in range(argc)]
+        stop_event = threading.Event()
+        handler = HandlerEx(functools.partial(stop_event, win_service_handler))
+        h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None)
+        if not h:
+            raise OSError('Handler registration failed: %s' %
+                          ctypes.FormatError())
+
+        TODO
+    except Exception as e:
+        tb = traceback.format_exc()
+        msg = str(e) + '\n' + tb
+        win_service_report_event(service_name, msg, is_error=True)
+        raise
+
+
+def win_service_start(service_name, real_main):
+    try:
+        cb = START_CALLBACK(
+            functools.partial(win_service_main, service_name, real_main))
+        dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [
+            SERVICE_TABLE_ENTRY(
+                service_name,
+                cb
+            ),
+            SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK))
+        ])
+
+        if not advapi32.StartServiceCtrlDispatcherW(dispatch_table):
+            raise OSError('ctypes start failed: %s' % ctypes.FormatError())
+    except Exception as e:
+        tb = traceback.format_exc()
+        msg = str(e) + '\n' + tb
+        win_service_report_event(service_name, msg, is_error=True)
+        raise
+
+
+def main(args=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-i', '--install',
+                        action='store_const', dest='action', const='install',
+                        help='Launch at Windows startup')
+    parser.add_argument('-u', '--uninstall',
+                        action='store_const', dest='action', const='uninstall',
+                        help='Remove Windows service')
+    parser.add_argument('-s', '--service',
+                        action='store_const', dest='action', const='service',
+                        help='Run as a Windows service')
+    parser.add_argument('-b', '--bind', metavar='<host:port>',
+                        action='store', default='localhost:8142',
+                        help='Bind to host:port (default %default)')
+    options = parser.parse_args(args=args)
+
+    if options.action == 'install':
+        fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox')
+        cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind)
+        win_install_service(SVCNAME, cmdline)
+        return
+
+    if options.action == 'uninstall':
+        win_uninstall_service(SVCNAME)
+        return
+
+    if options.action == 'service':
+        win_service_start(SVCNAME, main)
+        return
+
+    host, port_str = options.bind.split(':')
+    port = int(port_str)
+
+    print('Listening on %s:%d' % (host, port))
+    srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
+    thr = threading.Thread(target=srv.serve_forever)
+    thr.start()
+    input('Press ENTER to shut down')
+    srv.shutdown()
+    thr.join()
+
+
+def rmtree(path):
+    for name in os.listdir(path):
+        fname = os.path.join(path, name)
+        if os.path.isdir(fname):
+            rmtree(fname)
+        else:
+            os.chmod(fname, 0o666)
+            os.remove(fname)
+    os.rmdir(path)
+
+#==============================================================================
+
+class BuildError(Exception):
+    def __init__(self, output, code=500):
+        self.output = output
+        self.code = code
+
+    def __str__(self):
+        return self.output
+
+
+class HTTPError(BuildError):
+    pass
+
+
+class PythonBuilder(object):
+    def __init__(self, **kwargs):
+        pythonVersion = kwargs.pop('python', '2.7')
+        try:
+            key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion)
+            try:
+                self.pythonPath, _ = _winreg.QueryValueEx(key, '')
+            finally:
+                _winreg.CloseKey(key)
+        except Exception:
+            raise BuildError('No such Python version: %s' % pythonVersion)
+
+        super(PythonBuilder, self).__init__(**kwargs)
+
+
+class GITInfoBuilder(object):
+    def __init__(self, **kwargs):
+        try:
+            self.user, self.repoName = kwargs['path'][:2]
+            self.rev = kwargs.pop('rev')
+        except ValueError:
+            raise BuildError('Invalid path')
+        except KeyError as e:
+            raise BuildError('Missing mandatory parameter "%s"' % e.args[0])
+
+        path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user)
+        if not os.path.exists(path):
+            os.makedirs(path)
+        self.basePath = tempfile.mkdtemp(dir=path)
+        self.buildPath = os.path.join(self.basePath, 'build')
+
+        super(GITInfoBuilder, self).__init__(**kwargs)
+
+
+class GITBuilder(GITInfoBuilder):
+    def build(self):
+        try:
+            subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath])
+            subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath)
+        except subprocess.CalledProcessError as e:
+            raise BuildError(e.output)
+
+        super(GITBuilder, self).build()
+
+
+class YoutubeDLBuilder(object):
+    authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile']
+
+    def __init__(self, **kwargs):
+        if self.repoName != 'youtube-dl':
+            raise BuildError('Invalid repository "%s"' % self.repoName)
+        if self.user not in self.authorizedUsers:
+            raise HTTPError('Unauthorized user "%s"' % self.user, 401)
+
+        super(YoutubeDLBuilder, self).__init__(**kwargs)
+
+    def build(self):
+        try:
+            subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
+                                    cwd=self.buildPath)
+        except subprocess.CalledProcessError as e:
+            raise BuildError(e.output)
+
+        super(YoutubeDLBuilder, self).build()
+
+
+class DownloadBuilder(object):
+    def __init__(self, **kwargs):
+        self.handler = kwargs.pop('handler')
+        self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:]))
+        self.srcPath = os.path.abspath(os.path.normpath(self.srcPath))
+        if not self.srcPath.startswith(self.buildPath):
+            raise HTTPError(self.srcPath, 401)
+
+        super(DownloadBuilder, self).__init__(**kwargs)
+
+    def build(self):
+        if not os.path.exists(self.srcPath):
+            raise HTTPError('No such file', 404)
+        if os.path.isdir(self.srcPath):
+            raise HTTPError('Is a directory: %s' % self.srcPath, 401)
+
+        self.handler.send_response(200)
+        self.handler.send_header('Content-Type', 'application/octet-stream')
+        self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1])
+        self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size))
+        self.handler.end_headers()
+
+        with open(self.srcPath, 'rb') as src:
+            shutil.copyfileobj(src, self.handler.wfile)
+
+        super(DownloadBuilder, self).build()
+
+
+class CleanupTempDir(object):
+    def build(self):
+        try:
+            rmtree(self.basePath)
+        except Exception as e:
+            print('WARNING deleting "%s": %s' % (self.basePath, e))
+
+        super(CleanupTempDir, self).build()
+
+
+class Null(object):
+    def __init__(self, **kwargs):
+        pass
+
+    def start(self):
+        pass
+
+    def close(self):
+        pass
+
+    def build(self):
+        pass
+
+
+class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null):
+    pass
+
+
+class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
+    actionDict = { 'build': Builder, 'download': Builder } # They're the same, no more caching.
+
+    def do_GET(self):
+        path = urlparse.urlparse(self.path)
+        paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()])
+        action, _, path = path.path.strip('/').partition('/')
+        if path:
+            path = path.split('/')
+            if action in self.actionDict:
+                try:
+                    builder = self.actionDict[action](path=path, handler=self, **paramDict)
+                    builder.start()
+                    try:
+                        builder.build()
+                    finally:
+                        builder.close()
+                except BuildError as e:
+                    self.send_response(e.code)
+                    msg = unicode(e).encode('UTF-8')
+                    self.send_header('Content-Type', 'text/plain; charset=UTF-8')
+                    self.send_header('Content-Length', len(msg))
+                    self.end_headers()
+                    self.wfile.write(msg)
+                except HTTPError as e:
+                    self.send_response(e.code, str(e))
+            else:
+                self.send_response(500, 'Unknown build method "%s"' % action)
+        else:
+            self.send_response(500, 'Malformed URL')
+
+#==============================================================================
+
+if __name__ == '__main__':
+    main()
index 62c68a6cf46d0038c67b04ee3e407a029dd92cb5..796468b4b3aee3e603ddb919535bfde281cd71e5 100755 (executable)
@@ -55,8 +55,8 @@ git push origin "$version"
 /bin/echo -e "\n### OK, now it is time to build the binaries..."
 REV=$(git rev-parse HEAD)
 make youtube-dl youtube-dl.tar.gz
-wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \
-       wget "http://jeromelaheurte.net:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
+read -p "VM running? (y/n) " -n 1
+wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
 mkdir -p "build/$version"
 mv youtube-dl youtube-dl.exe "build/$version"
 mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
index b390c7e2ead61db13197020327d082dbae30f0ba..66019ee55aa3ecd22b33cd7cf8760dcc3e4fe2aa 100644 (file)
@@ -24,8 +24,8 @@ tests = [
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
      "yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
     # 85 - vflkuzxcs 2013/09/11
-    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
-     "T>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOvUY.REWQ0987654321mnbqcxzasdfghjklpoiuytr"),
+    ('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[',
+     '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@'),
     # 84 - vflg0g8PQ 2013/08/29 (sporadic)
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
      ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
index 99fc7bd28c46393513795de17ac7eee76cb4b9ba..ff1c86efebe31f2d6477cbfe6246baa50607d6a7 100644 (file)
@@ -36,6 +36,7 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
         self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
         self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
+        self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
 
     def test_youtube_channel_matching(self):
         assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
index bcd9f79f659d4cc76737bf6c587d2e82dcf9079d..83c65d57e60870be10b25c116ebad6fd20cc8122 100644 (file)
@@ -40,6 +40,7 @@ class TestDailymotionSubtitles(unittest.TestCase):
         subtitles = self.getSubtitles()
         self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
     def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         self.assertEqual(len(subtitles.keys()), 5)
@@ -54,6 +55,7 @@ class TestDailymotionSubtitles(unittest.TestCase):
         self.assertTrue(len(subtitles.keys()) == 0)
     def test_nosubtitles(self):
         self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
+        self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         self.assertEqual(len(subtitles), 0)
index 4a2e00b01a5a368a8ef2b14ecae231e53491935c..d079a4f23217d3a9216048a37e62f83b999e7763 100644 (file)
@@ -8,7 +8,7 @@ import json
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE
+from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE, SoundcloudUserIE
 from youtube_dl.utils import *
 
 from helper import FakeYDL
@@ -42,5 +42,13 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['id'], u'5124905')
         self.assertTrue(len(result['entries']) >= 11)
 
+    def test_soundcloud_user(self):
+        dl = FakeYDL()
+        ie = SoundcloudUserIE(dl)
+        result = ie.extract('https://soundcloud.com/the-concept-band')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], u'9615865')
+        self.assertTrue(len(result['entries']) >= 12)
+
 if __name__ == '__main__':
     unittest.main()
index 5632871aca37eac4478be6efbdd2d39bbe19daba..168e6c66cbf089d72622ab6cc20f2c66d19a8c2b 100644 (file)
@@ -41,6 +41,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
         subtitles = self.getSubtitles()
         self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
     def test_youtube_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         self.assertEqual(len(subtitles.keys()), 13)
@@ -66,6 +67,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
         self.assertTrue(subtitles['it'] is not None)
     def test_youtube_nosubtitles(self):
         self.url = 'sAjKT8FhjI8'
+        self.DL.params['writesubtitles'] = True
         self.DL.params['allsubtitles'] = True
         subtitles = self.getSubtitles()
         self.assertEqual(len(subtitles), 0)
index c2f992b8e00f333f59da38ffd59153183d057281..de2b133e020e59d0663ea549a5e8b2ce715a7ef0 100644 (file)
@@ -74,6 +74,7 @@ class YoutubeDL(object):
     writesubtitles:    Write the video subtitles to a file
     writeautomaticsub: Write the automatic subtitles to a file
     allsubtitles:      Downloads all the subtitles of the video
+                       (requires writesubtitles or writeautomaticsub)
     listsubtitles:     Lists all available subtitles for the video
     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
     subtitleslangs:    List of languages of the subtitles to download
@@ -141,14 +142,10 @@ class YoutubeDL(object):
 
     def to_screen(self, message, skip_eol=False):
         """Print message to stdout if not in quiet mode."""
-        assert type(message) == type(u'')
         if not self.params.get('quiet', False):
             terminator = [u'\n', u''][skip_eol]
             output = message + terminator
-            if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
-                output = output.encode(preferredencoding(), 'ignore')
-            self._screen_file.write(output)
-            self._screen_file.flush()
+            write_string(output, self._screen_file)
 
     def to_stderr(self, message):
         """Print message to stderr."""
@@ -499,8 +496,7 @@ class YoutubeDL(object):
                 return
 
         subtitles_are_requested = any([self.params.get('writesubtitles', False),
-                                       self.params.get('writeautomaticsub'),
-                                       self.params.get('allsubtitles', False)])
+                                       self.params.get('writeautomaticsub')])
 
         if  subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
             # subtitles download errors are already managed as troubles in relevant IE
index 696e54f49c21478c1ae70dd22ace6e73c5f79246..df4feefe743b1752f17005cde21decce3ab8523e 100644 (file)
@@ -30,6 +30,7 @@ __authors__  = (
     'Pierre Rudloff',
     'Huarong Huo',
     'Ismael Mejía',
+    'Steffan \'Ruirize\' James',
 )
 
 __license__ = 'Public Domain'
@@ -149,7 +150,7 @@ def parseOpts(overrideArguments=None):
     general.add_option('-U', '--update',
             action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
     general.add_option('-i', '--ignore-errors',
-            action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
+            action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False)
     general.add_option('--dump-user-agent',
             action='store_true', dest='dump_user_agent',
             help='display the current browser identification', default=False)
@@ -354,7 +355,7 @@ def parseOpts(overrideArguments=None):
     if overrideArguments is not None:
         opts, args = parser.parse_args(overrideArguments)
         if opts.verbose:
-            sys.stderr.write(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
+            write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
     else:
         xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
         if xdg_config_home:
@@ -367,9 +368,9 @@ def parseOpts(overrideArguments=None):
         argv = systemConf + userConf + commandLineConf
         opts, args = parser.parse_args(argv)
         if opts.verbose:
-            sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
-            sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
-            sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
+            write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
+            write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
+            write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
 
     return parser, opts, args
 
@@ -392,7 +393,7 @@ def _real_main(argv=None):
         except (IOError, OSError) as err:
             if opts.verbose:
                 traceback.print_exc()
-            sys.stderr.write(u'ERROR: unable to open cookie file\n')
+            write_string(u'ERROR: unable to open cookie file\n')
             sys.exit(101)
     # Set user agent
     if opts.user_agent is not None:
@@ -419,7 +420,7 @@ def _real_main(argv=None):
             batchurls = [x.strip() for x in batchurls]
             batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
             if opts.verbose:
-                sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
+                write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
         except IOError:
             sys.exit(u'ERROR: batch file could not be read')
     all_urls = batchurls + args
@@ -533,6 +534,11 @@ def _real_main(argv=None):
     else:
         date = DateRange(opts.dateafter, opts.datebefore)
 
+    # --all-sub automatically sets --write-sub if --write-auto-sub is not given
+    # this was the old behaviour if only --all-sub was given.
+    if opts.allsubtitles and (opts.writeautomaticsub == False):
+        opts.writesubtitles = True
+
     if sys.version_info < (3,):
         # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
         if opts.outtmpl is not None:
@@ -606,7 +612,7 @@ def _real_main(argv=None):
         })
 
     if opts.verbose:
-        sys.stderr.write(u'[debug] youtube-dl version ' + __version__ + u'\n')
+        write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
         try:
             sp = subprocess.Popen(
                 ['git', 'rev-parse', '--short', 'HEAD'],
@@ -615,14 +621,14 @@ def _real_main(argv=None):
             out, err = sp.communicate()
             out = out.decode().strip()
             if re.match('[0-9a-f]+', out):
-                sys.stderr.write(u'[debug] Git HEAD: ' + out + u'\n')
+                write_string(u'[debug] Git HEAD: ' + out + u'\n')
         except:
             try:
                 sys.exc_clear()
             except:
                 pass
-        sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
-        sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
+        write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
+        write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
 
     ydl.add_default_info_extractors()
 
index 25a8e3cf58927f730f3a2ac6a53672fad2739678..726c9fa15961ee893ff0ec1775c5b33999755660 100644 (file)
@@ -6,6 +6,7 @@ from .arte import ArteTvIE
 from .auengine import AUEngineIE
 from .bandcamp import BandcampIE
 from .bliptv import BlipTVIE, BlipTVUserIE
+from .bloomberg import BloombergIE
 from .breakcom import BreakIE
 from .brightcove import BrightcoveIE
 from .c56 import C56IE
@@ -71,6 +72,7 @@ from .myvideo import MyVideoIE
 from .naver import NaverIE
 from .nba import NBAIE
 from .nbc import NBCNewsIE
+from .newgrounds import NewgroundsIE
 from .ooyala import OoyalaIE
 from .orf import ORFIE
 from .pbs import PBSIE
@@ -86,7 +88,8 @@ from .sina import SinaIE
 from .slashdot import SlashdotIE
 from .slideshare import SlideshareIE
 from .sohu import SohuIE
-from .soundcloud import SoundcloudIE, SoundcloudSetIE
+from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
+from .southparkstudios import SouthParkStudiosIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .statigram import StatigramIE
@@ -106,6 +109,7 @@ from .vbox7 import Vbox7IE
 from .veehd import VeeHDIE
 from .veoh import VeohIE
 from .vevo import VevoIE
+from .vice import ViceIE
 from .videofyme import VideofyMeIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
index 7efd1d82324c5397bb6d6f10e1bfa993a2531584..61ce4469a05dd3cdf9bddbecf8c82119c40b5c3f 100644 (file)
@@ -46,6 +46,8 @@ class ArchiveOrgIE(InfoExtractor):
             for fn,fdata in data['files'].items()
             if 'Video' in fdata['format']]
         formats.sort(key=lambda fdata: fdata['file_size'])
+        for f in formats:
+            f['ext'] = determine_ext(f['url'])
 
         info = {
             '_type': 'video',
@@ -61,7 +63,6 @@ class ArchiveOrgIE(InfoExtractor):
             info['thumbnail'] = thumbnail
 
         # TODO: Remove when #980 has been merged
-        info['url'] = formats[-1]['url']
-        info['ext'] = determine_ext(formats[-1]['url'])
+        info.update(formats[-1])
 
-        return info
\ No newline at end of file
+        return info
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py
new file mode 100644 (file)
index 0000000..3666a78
--- /dev/null
@@ -0,0 +1,27 @@
+import re
+
+from .common import InfoExtractor
+
+
+class BloombergIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?).html'
+
+    _TEST = {
+        u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
+        u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4',
+        u'info_dict': {
+            u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies',
+            u'description': u'md5:abc86e5236f9f0e4866c59ad36736686',
+        },
+        u'params': {
+            # Requires ffmpeg (m3u8 manifest)
+            u'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        name = mobj.group('name')
+        webpage = self._download_webpage(url, name)
+        ooyala_url = self._og_search_video_url(webpage)
+        return self.url_result(ooyala_url, ie='Ooyala')
index 64b4658053cd98d0313071dccc05548384098ae7..765cb1f377df132ee91deac5872877777187cd6a 100644 (file)
@@ -54,6 +54,7 @@ class DreiSatIE(InfoExtractor):
             'width': int(fe.find('./width').text),
             'height': int(fe.find('./height').text),
             'url': fe.find('./url').text,
+            'ext': determine_ext(fe.find('./url').text),
             'filesize': int(fe.find('./filesize').text),
             'video_bitrate': int(fe.find('./videoBitrate').text),
             '3sat_qualityname': fe.find('./quality').text,
@@ -79,7 +80,6 @@ class DreiSatIE(InfoExtractor):
         }
 
         # TODO: Remove when #980 has been merged
-        info['url'] = formats[-1]['url']
-        info['ext'] = determine_ext(formats[-1]['url'])
+        info.update(formats[-1])
 
-        return info
\ No newline at end of file
+        return info
index f2b12c884f7a2418b40de4125edd04478bacb9b9..b8fe82e474ba7820a63d2c01ffc34700c349a984 100644 (file)
@@ -34,17 +34,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
     IE_NAME = u'pluzz.francetv.fr'
     _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
 
-    _TEST = {
-        u'url': u'http://pluzz.francetv.fr/videos/allo_rufo_saison5_,88439064.html',
-        u'file': u'88439064.mp4',
-        u'info_dict': {
-            u'title': u'Allô Rufo',
-            u'description': u'md5:d909f1ebdf963814b65772aea250400e',
-        },
-        u'params': {
-            u'skip_download': True,
-        },
-    }
+    # Can't use tests, videos expire in 7 days
 
     def _real_extract(self, url):
         title = re.match(self._VALID_URL, url).group(1)
index f1cd889834dc712d8b3c38478f85e30f2f92e44f..8895ad2897f8abd30471ba3f7ef07963c44e7b57 100644 (file)
@@ -40,7 +40,8 @@ class GooglePlusIE(InfoExtractor):
         self.report_extraction(video_id)
 
         # Extract update date
-        upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>',
+        upload_date = self._html_search_regex(
+            ['title="Timestamp">(.*?)</a>', r'<a.+?class="g-M.+?>(.+?)</a>'],
             webpage, u'upload date', fatal=False)
         if upload_date:
             # Convert timestring to a format suitable for filename
index ccca1d7e0bb41dae5694c2bd582728cc939b87da..3798118a7fc491f9b2437878cf9d99df1f05b5ec 100644 (file)
@@ -7,11 +7,11 @@ from .common import InfoExtractor
 class HotNewHipHopIE(InfoExtractor):
     _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html'
     _TEST = {
-        u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html'",
+        u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html",
         u'file': u'1435540.mp3',
         u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
         u'info_dict': {
-            u"title": u"Freddie Gibbs Songs - Lay It Down"
+            u"title": u"Freddie Gibbs - Lay It Down"
         }
     }
 
index 8245b5583839c90fc5580785010b9907b1a12088..a200dcd74a5a7af220cedea02a60c01cfd643e79 100644 (file)
@@ -5,34 +5,27 @@ import socket
 from .common import InfoExtractor
 from ..utils import (
     compat_http_client,
-    compat_str,
     compat_urllib_error,
     compat_urllib_request,
-
-    ExtractorError,
+    unified_strdate,
 )
 
 
 class MixcloudIE(InfoExtractor):
-    _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/
     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
     IE_NAME = u'mixcloud'
 
-    def report_download_json(self, file_id):
-        """Report JSON download."""
-        self.to_screen(u'Downloading json')
-
-    def get_urls(self, jsonData, fmt, bitrate='best'):
-        """Get urls from 'audio_formats' section in json"""
-        try:
-            bitrate_list = jsonData[fmt]
-            if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
-                bitrate = max(bitrate_list) # select highest
-
-            url_list = jsonData[fmt][bitrate]
-        except TypeError: # we have no bitrate info.
-            url_list = jsonData[fmt]
-        return url_list
+    _TEST = {
+        u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/',
+        u'file': u'dholbach-cryptkeeper.mp3',
+        u'info_dict': {
+            u'title': u'Cryptkeeper',
+            u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
+            u'uploader': u'Daniel Holbach',
+            u'uploader_id': u'dholbach',
+            u'upload_date': u'20111115',
+        },
+    }
 
     def check_urls(self, url_list):
         """Returns 1st active url from list"""
@@ -45,71 +38,32 @@ class MixcloudIE(InfoExtractor):
 
         return None
 
-    def _print_formats(self, formats):
-        print('Available formats:')
-        for fmt in formats.keys():
-            for b in formats[fmt]:
-                try:
-                    ext = formats[fmt][b][0]
-                    print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]))
-                except TypeError: # we have no bitrate info
-                    ext = formats[fmt][0]
-                    print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]))
-                    break
-
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
-        # extract uploader & filename from url
-        uploader = mobj.group(1).decode('utf-8')
-        file_id = uploader + "-" + mobj.group(2).decode('utf-8')
-
-        # construct API request
-        file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
-        # retrieve .json file with links to files
-        request = compat_urllib_request.Request(file_url)
-        try:
-            self.report_download_json(file_url)
-            jsonData = compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err))
-
-        # parse JSON
-        json_data = json.loads(jsonData)
-        player_url = json_data['player_swf_url']
-        formats = dict(json_data['audio_formats'])
-
-        req_format = self._downloader.params.get('format', None)
-
-        if self._downloader.params.get('listformats', None):
-            self._print_formats(formats)
-            return
-
-        if req_format is None or req_format == 'best':
-            for format_param in formats.keys():
-                url_list = self.get_urls(formats, format_param)
-                # check urls
-                file_url = self.check_urls(url_list)
-                if file_url is not None:
-                    break # got it!
-        else:
-            if req_format not in formats:
-                raise ExtractorError(u'Format is not available')
-
-            url_list = self.get_urls(formats, req_format)
-            file_url = self.check_urls(url_list)
-            format_param = req_format
 
-        return [{
-            'id': file_id.decode('utf-8'),
-            'url': file_url.decode('utf-8'),
-            'uploader': uploader.decode('utf-8'),
-            'upload_date': None,
-            'title': json_data['name'],
-            'ext': file_url.split('.')[-1].decode('utf-8'),
-            'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
-            'thumbnail': json_data['thumbnail_url'],
-            'description': json_data['description'],
-            'player_url': player_url.decode('utf-8'),
-        }]
+        uploader = mobj.group(1)
+        cloudcast_name = mobj.group(2)
+        track_id = '-'.join((uploader, cloudcast_name))
+        api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
+        webpage = self._download_webpage(url, track_id)
+        json_data = self._download_webpage(api_url, track_id,
+            u'Downloading cloudcast info')
+        info = json.loads(json_data)
+
+        preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
+        song_url = preview_url.replace('/previews/', '/cloudcasts/originals/')
+        template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
+        final_song_url = self.check_urls(template_url % i for i in range(30))
+
+        return {
+            'id': track_id,
+            'title': info['name'],
+            'url': final_song_url,
+            'ext': 'mp3',
+            'description': info['description'],
+            'thumbnail': info['pictures'].get('extra_large'),
+            'uploader': info['user']['name'],
+            'uploader_id': info['user']['username'],
+            'upload_date': unified_strdate(info['created_time']),
+            'view_count': info['play_count'],
+        }
diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py
new file mode 100644 (file)
index 0000000..2ef80bc
--- /dev/null
@@ -0,0 +1,38 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class NewgroundsIE(InfoExtractor):
+    _VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P<id>\d+)'
+    _TEST = {
+        u'url': u'http://www.newgrounds.com/audio/listen/549479',
+        u'file': u'549479.mp3',
+        u'md5': u'fe6033d297591288fa1c1f780386f07a',
+        u'info_dict': {
+            u"title": u"B7 - BusMode",
+            u"uploader": u"Burn7",
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        music_id = mobj.group('id')
+        webpage = self._download_webpage(url, music_id)
+        
+        title = self._html_search_regex(r',"name":"([^"]+)",', webpage, u'music title')
+        uploader = self._html_search_regex(r',"artist":"([^"]+)",', webpage, u'music uploader')
+        
+        music_url_json_string = self._html_search_regex(r'({"url":"[^"]+"),', webpage, u'music url') + '}'
+        music_url_json = json.loads(music_url_json_string)
+        music_url = music_url_json['url']
+
+        return {
+            'id':       music_id,
+            'title':    title,
+            'url':      music_url,
+            'uploader': uploader,
+            'ext':      determine_ext(music_url),
+        }
index b734722d0890c9e4327c405f4400c2aac9b1f5f1..1f7b4d2e7e9fa79ef9f81f71f190f943c35dd3a5 100644 (file)
@@ -18,11 +18,15 @@ class OoyalaIE(InfoExtractor):
         },
     }
 
+    @staticmethod
+    def _url_for_embed_code(embed_code):
+        return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
+
     def _extract_result(self, info, more_info):
         return {'id': info['embedCode'],
                 'ext': 'mp4',
                 'title': unescapeHTML(info['title']),
-                'url': info['url'],
+                'url': info.get('ipad_url') or info['url'],
                 'description': unescapeHTML(more_info['description']),
                 'thumbnail': more_info['promo'],
                 }
@@ -35,7 +39,9 @@ class OoyalaIE(InfoExtractor):
         mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
                                         player, u'mobile player url')
         mobile_player = self._download_webpage(mobile_url, embedCode)
-        videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"')
+        videos_info = self._search_regex(
+            r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
+            mobile_player, u'info').replace('\\"','"')
         videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
         videos_info = json.loads(videos_info)
         videos_more_info =json.loads(videos_more_info)
index 5f3a5540d2775ae1952d31ab86447ed5151e952f..29cd5617c7d1919fa95e0b48e7ff35585106b800 100644 (file)
@@ -1,10 +1,12 @@
 import json
 import re
+import itertools
 
 from .common import InfoExtractor
 from ..utils import (
     compat_str,
     compat_urlparse,
+    compat_urllib_parse,
 
     ExtractorError,
     unified_strdate,
@@ -53,10 +55,11 @@ class SoundcloudIE(InfoExtractor):
     def _resolv_url(cls, url):
         return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
 
-    def _extract_info_dict(self, info, full_title=None):
+    def _extract_info_dict(self, info, full_title=None, quiet=False):
         video_id = info['id']
         name = full_title or video_id
-        self.report_extraction(name)
+        if quiet == False:
+            self.report_extraction(name)
 
         thumbnail = info['artwork_url']
         if thumbnail is not None:
@@ -198,3 +201,41 @@ class SoundcloudSetIE(SoundcloudIE):
                 'id': info['id'],
                 'title': info['title'],
                 }
+
+
+class SoundcloudUserIE(SoundcloudIE):
+    _VALID_URL = r'https?://(www\.)?soundcloud.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
+    IE_NAME = u'soundcloud:user'
+
+    # it's in tests/test_playlists.py
+    _TEST = None
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        uploader = mobj.group('user')
+
+        url = 'http://soundcloud.com/%s/' % uploader
+        resolv_url = self._resolv_url(url)
+        user_json = self._download_webpage(resolv_url, uploader,
+            u'Downloading user info')
+        user = json.loads(user_json)
+
+        tracks = []
+        for i in itertools.count():
+            data = compat_urllib_parse.urlencode({'offset': i*50,
+                                                  'client_id': self._CLIENT_ID,
+                                                  })
+            tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data
+            response = self._download_webpage(tracks_url, uploader, 
+                u'Downloading tracks page %s' % (i+1))
+            new_tracks = json.loads(response)
+            tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks)
+            if len(new_tracks) < 50:
+                break
+
+        return {
+            '_type': 'playlist',
+            'id': compat_str(user['id']),
+            'title': user['username'],
+            'entries': tracks,
+        }
diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py
new file mode 100644 (file)
index 0000000..a5dc754
--- /dev/null
@@ -0,0 +1,34 @@
+import re
+
+from .mtv import MTVIE, _media_xml_tag
+
+
+class SouthParkStudiosIE(MTVIE):
+    IE_NAME = u'southparkstudios.com'
+    _VALID_URL = r'https?://www\.southparkstudios\.com/clips/(?P<id>\d+)'
+
+    _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
+
+    _TEST = {
+        u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
+        u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
+        u'info_dict': {
+            u'title': u'Bat Daded',
+            u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
+        },
+    }
+
+    # Overwrite MTVIE properties we don't want
+    _TESTS = []
+
+    def _get_thumbnail_url(self, uri, itemdoc):
+        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
+        return itemdoc.find(search_path).attrib['url']
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
+                                  webpage, u'mgid')
+        return self._get_videos_info(mgid)
index 97215f2897d7be30d6bf70e036b2f7b0fe30b3b4..90de7de3a709d4385b29e62d44ae1e82349d883e 100644 (file)
@@ -10,8 +10,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
     @property
     def _have_to_download_any_subtitles(self):
         return any([self._downloader.params.get('writesubtitles', False),
-                    self._downloader.params.get('writeautomaticsub'),
-                    self._downloader.params.get('allsubtitles', False)])
+                    self._downloader.params.get('writeautomaticsub')])
 
     def _list_available_subtitles(self, video_id, webpage=None):
         """ outputs the available subtitles for the video """
@@ -34,7 +33,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
         available_subs_list = {}
         if self._downloader.params.get('writeautomaticsub', False):
             available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
-        if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
+        if self._downloader.params.get('writesubtitles', False):
             available_subs_list.update(self._get_available_subtitles(video_id))
 
         if not available_subs_list:  # error, it didn't get the available subtitles
index f278951baf0ae38fbbc7c5c405c473a8649e9caf..0bf028f6195ba56be22e059bdc83d23cbabff59b 100644 (file)
@@ -52,6 +52,7 @@ class TriluliluIE(InfoExtractor):
             {
                 'format': fnode.text,
                 'url': video_url_template % fnode.text,
+                'ext': fnode.text.partition('-')[0]
             }
 
             for fnode in format_doc.findall('./formats/format')
@@ -67,7 +68,6 @@ class TriluliluIE(InfoExtractor):
         }
 
         # TODO: Remove when #980 has been merged
-        info['url'] = formats[-1]['url']
-        info['ext'] = formats[-1]['format'].partition('-')[0]
+        info.update(formats[-1])
 
         return info
diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py
new file mode 100644 (file)
index 0000000..6b93afa
--- /dev/null
@@ -0,0 +1,38 @@
+import re
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+from ..utils import ExtractorError
+
+
+class ViceIE(InfoExtractor):
+    _VALID_URL = r'http://www.vice.com/.*?/(?P<name>.+)'
+
+    _TEST = {
+        u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
+        u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4',
+        u'info_dict': {
+            u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
+        },
+        u'params': {
+            # Requires ffmpeg (m3u8 manifest)
+            u'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        name = mobj.group('name')
+        webpage = self._download_webpage(url, name)
+        try:
+            ooyala_url = self._og_search_video_url(webpage)
+        except ExtractorError:
+            try:
+                embed_code = self._search_regex(
+                    r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage,
+                    u'ooyala embed code')
+                ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
+            except ExtractorError:
+                raise ExtractorError(u'The page doesn\'t contain a video', expected=True)
+        return self.url_result(ooyala_url, ie='Ooyala')
+
index 88b8b6be09f7a8f892db8266b3e68df14e22bfe7..361619694980d3260ff81aeed2d0d07294739a0e 100644 (file)
@@ -11,8 +11,8 @@ from ..utils import (
 
 class XHamsterIE(InfoExtractor):
     """Information Extractor for xHamster"""
-    _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
-    _TEST = {
+    _VALID_URL = r'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
+    _TESTS = [{
         u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
         u'file': u'1509445.flv',
         u'md5': u'9f48e0e8d58e3076bb236ff412ab62fa',
@@ -21,13 +21,24 @@ class XHamsterIE(InfoExtractor):
             u"uploader_id": u"Ruseful2011", 
             u"title": u"FemaleAgent Shy beauty takes the bait"
         }
-    }
+    },
+    {
+        u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
+        u'file': u'2221348.flv',
+        u'md5': u'e767b9475de189320f691f49c679c4c7',
+        u'info_dict': {
+            u"upload_date": u"20130914", 
+            u"uploader_id": u"jojo747400", 
+            u"title": u"Britney Spears  Sexy Booty"
+        }
+    }]
 
     def _real_extract(self,url):
         mobj = re.match(self._VALID_URL, url)
 
         video_id = mobj.group('id')
-        mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
+        seo = mobj.group('seo')
+        mrss_url = 'http://xhamster.com/movies/%s/%s.html?hd' % (video_id, seo)
         webpage = self._download_webpage(mrss_url, video_id)
 
         mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
index f49665925fdeadeb9f3f64f9eafb7c7a12080fee..23a8097c5a91f3409688d26006cf4b26b785139e 100644 (file)
@@ -139,7 +139,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                      (
                          (?:https?://)?                                       # http(s):// (optional)
                          (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
-                            tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains
+                            tube\.majestyc\.net/|
+                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                          (?:                                                  # the various things that can precede the ID:
                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
@@ -428,7 +429,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         elif len(s) == 86:
             return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
         elif len(s) == 85:
-            return s[40] + s[82:43:-1] + s[22] + s[42:40:-1] + s[83] + s[39:22:-1] + s[0] + s[21:2:-1]
+            return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
         elif len(s) == 84:
             return s[81:36:-1] + s[0] + s[35:2:-1]
         elif len(s) == 83:
@@ -782,10 +783,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                         if self._downloader.params.get('verbose'):
                             s = url_data['s'][0]
                             if age_gate:
-                                player_version = self._search_regex(r'ad3-(.+?)\.swf',
-                                    video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
-                                    'flash player', fatal=False)
-                                player = 'flash player %s' % player_version
+                                player = 'flash player'
                             else:
                                 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
                                     'html5 player', fatal=False)
@@ -1007,6 +1005,9 @@ class YoutubeUserIE(InfoExtractor):
                 response = json.loads(page)
             except ValueError as err:
                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
+            if 'entry' not in response['feed']:
+                # Number of videos is a multiple of self._MAX_RESULTS
+                break
 
             # Extract video identifiers
             ids_in_page = []
index 768c6207df5e32728c95ccc4a1cfa5e05d9088c3..814a9b6be6aabc366d96c23bca5c9c3c1e80f8b6 100644 (file)
@@ -700,7 +700,16 @@ def unified_strdate(date_str):
     date_str = date_str.replace(',',' ')
     # %z (UTC offset) is only supported in python>=3.2
     date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
-    format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
+    format_expressions = [
+        '%d %B %Y',
+        '%B %d %Y',
+        '%b %d %Y',
+        '%Y-%m-%d',
+        '%d/%m/%Y',
+        '%Y/%m/%d %H:%M:%S',
+        '%d.%m.%Y %H:%M',
+        '%Y-%m-%dT%H:%M:%SZ',
+    ]
     for expression in format_expressions:
         try:
             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
@@ -781,6 +790,18 @@ def platform_name():
     return res
 
 
+def write_string(s, out=None):
+    if out is None:
+        out = sys.stderr
+    assert type(s) == type(u'')
+
+    if ('b' in getattr(out, 'mode', '') or
+            sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
+        s = s.encode(preferredencoding(), 'ignore')
+    out.write(s)
+    out.flush()
+
+
 def bytes_to_intlist(bs):
     if not bs:
         return []
index 3b2505c77f9026cfc92b50f6bc86b875fc4b70ba..80ccfbd4f2465b0a00e3693a6f588671226f5ff2 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2013.09.12'
+__version__ = '2013.09.17'