Add experimental support for lazy loading the info extractors
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Wed, 10 Feb 2016 13:01:31 +0000 (14:01 +0100)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Fri, 8 Apr 2016 19:50:07 +0000 (21:50 +0200)
'make lazy-extractors' creates the youtube_dl/extractor/lazy_extractors.py (imported by youtube_dl/extractor/__init__.py), which contains simplified classes that only have the 'suitable' class method and that load the appropiate class with the '__new__' method when a instance is created.

.gitignore
Makefile
devscripts/lazy_load_template.py [new file with mode: 0644]
devscripts/make_lazy_extractors.py [new file with mode: 0644]
youtube_dl/extractor/__init__.py

index 26dbde73d412673ee9c53ee06a476a803a92edc7..72c10425d675f7c1952061be0057db0c2e5e232d 100644 (file)
@@ -13,6 +13,7 @@ README.txt
 youtube-dl.1
 youtube-dl.bash-completion
 youtube-dl.fish
+youtube_dl/extractor/lazy_extractors.py
 youtube-dl
 youtube-dl.exe
 youtube-dl.tar.gz
index ba7f7ed3663ddd3c7dccf18207f5292b539bdaf1..06cffcb710c6fd8fa6962007bd07d4753d5d5af6 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
 
 clean:
-       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
+       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
        find . -name "*.pyc" -delete
        find . -name "*.class" -delete
 
@@ -88,6 +88,12 @@ youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in
 
 fish-completion: youtube-dl.fish
 
+lazy-extractors: youtube_dl/extractor/lazy_extractors.py
+
+_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py'
+youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
+       $(PYTHON) devscripts/make_lazy_extractors.py $@
+
 youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
        @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
                --exclude '*.DS_Store' \
diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py
new file mode 100644 (file)
index 0000000..ae2bd27
--- /dev/null
@@ -0,0 +1,17 @@
+# flake8: noqa
+from __future__ import unicode_literals
+
+import re
+
+
+class LazyLoadExtractor(object):
+    _module = None
+
+    @classmethod
+    def ie_key(cls):
+        return cls.__name__[:-2]
+
+    def __new__(cls):
+        mod = __import__(cls._module, fromlist=(cls.__name__,))
+        real_cls = getattr(mod, cls.__name__)
+        return real_cls.__new__(real_cls)
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
new file mode 100644 (file)
index 0000000..8627d0b
--- /dev/null
@@ -0,0 +1,63 @@
+from __future__ import unicode_literals, print_function
+
+from inspect import getsource
+import os
+from os.path import dirname as dirn
+import sys
+
+print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
+
+sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+
+lazy_extractors_filename = sys.argv[1]
+if os.path.exists(lazy_extractors_filename):
+    os.remove(lazy_extractors_filename)
+
+from youtube_dl.extractor import _ALL_CLASSES
+from youtube_dl.extractor.common import InfoExtractor
+
+with open('devscripts/lazy_load_template.py', 'rt') as f:
+    module_template = f.read()
+
+module_contents = [module_template + '\n' + getsource(InfoExtractor.suitable)]
+
+ie_template = '''
+class {name}(LazyLoadExtractor):
+    _VALID_URL = {valid_url!r}
+    _module = '{module}'
+'''
+
+make_valid_template = '''
+    @classmethod
+    def _make_valid_url(cls):
+        return {!r}
+'''
+
+
+def build_lazy_ie(ie, name):
+    valid_url = getattr(ie, '_VALID_URL', None)
+    s = ie_template.format(
+        name=name,
+        valid_url=valid_url,
+        module=ie.__module__)
+    if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
+        s += getsource(ie.suitable)
+    if hasattr(ie, '_make_valid_url'):
+        # search extractors
+        s += make_valid_template.format(ie._make_valid_url())
+    return s
+
+names = []
+for ie in _ALL_CLASSES:
+    name = ie.ie_key() + 'IE'
+    src = build_lazy_ie(ie, name)
+    module_contents.append(src)
+    names.append(name)
+
+module_contents.append(
+    '_ALL_CLASSES = [{}]'.format(', '.join(names)))
+
+module_src = '\n'.join(module_contents)
+
+with open(lazy_extractors_filename, 'wt') as f:
+    f.write(module_src)
index a0a53445a5b32563f6ade427d8a3ae0ee74d43aa..b0d4d156b411e29cec6803b181bfacc0a87262b7 100644 (file)
@@ -1,13 +1,17 @@
 from __future__ import unicode_literals
 
-from .extractors import *
-
-_ALL_CLASSES = [
-    klass
-    for name, klass in globals().items()
-    if name.endswith('IE') and name != 'GenericIE'
-]
-_ALL_CLASSES.append(GenericIE)
+try:
+    from .lazy_extractors import *
+    from .lazy_extractors import _ALL_CLASSES
+except ImportError:
+    from .extractors import *
+
+    _ALL_CLASSES = [
+        klass
+        for name, klass in globals().items()
+        if name.endswith('IE') and name != 'GenericIE'
+    ]
+    _ALL_CLASSES.append(GenericIE)
 
 
 def gen_extractor_classes():