Add `--force-generic-extractor`
authorSergey M․ <dstftw@gmail.com>
Fri, 12 Jun 2015 13:20:12 +0000 (19:20 +0600)
committerSergey M․ <dstftw@gmail.com>
Fri, 12 Jun 2015 13:20:12 +0000 (19:20 +0600)
For some extractors that are hard to workout a good _VALID_URL we use very vague and unrestrictive ones,
e.g. just allowing anything after hostname and capturing part of URL as id.
If some of these extractors happen to have an video embed of some different hoster or platform
and this scenario was not handled in extractor itself we end up with inability to download this embed
until extractor is fixed to support embed of this kind.
Forcing downloader to use the generic extractor can be a neat temporary solution for this problem.
Example: FiveTV extractor with Tvigle embed - http://www.5-tv.ru/rabota/broadcasts/48/

youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/extractor/generic.py
youtube_dl/options.py

index b1f792d4ef8815dc8cfb4471cdd6f0e47a93e8f7..4b801a9177a2a74f342d40fa97c3e925156ecd45 100755 (executable)
@@ -139,6 +139,7 @@ class YoutubeDL(object):
     outtmpl:           Template for output names.
     restrictfilenames: Do not allow "&" and spaces in file names
     ignoreerrors:      Do not stop on download errors.
+    force_generic_extractor: Force downloader to use the generic extractor
     nooverwrites:      Prevent overwriting files.
     playliststart:     Playlist item to start at.
     playlistend:       Playlist item to end at.
@@ -282,6 +283,7 @@ class YoutubeDL(object):
         self._num_downloads = 0
         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
         self._err_file = sys.stderr
+        self._force_generic_extractor_required = params.get('force_generic_extractor', False)
         self.params = params
         self.cache = Cache(self)
 
@@ -633,6 +635,10 @@ class YoutubeDL(object):
         extra_info is a dict containing the extra values to add to each result
         '''
 
+        if not ie_key and self._force_generic_extractor_required:
+            self._force_generic_extractor_required = False
+            ie_key = 'Generic'
+
         if ie_key:
             ies = [self.get_info_extractor(ie_key)]
         else:
index ace17857c8cb28320ba1fab2988e56c020583af7..215b616de710705d3df73d0cb7dbf14d5dc08848 100644 (file)
@@ -293,6 +293,7 @@ def _real_main(argv=None):
         'autonumber_size': opts.autonumber_size,
         'restrictfilenames': opts.restrictfilenames,
         'ignoreerrors': opts.ignoreerrors,
+        'force_generic_extractor': opts.force_generic_extractor,
         'ratelimit': opts.ratelimit,
         'nooverwrites': opts.nooverwrites,
         'retries': opts_retries,
index 40d869c534759453ada54dce010b0705538b25c0..3d672197c2c046a6291eeecbfa3bf27e3fc02c77 100644 (file)
@@ -977,7 +977,9 @@ class GenericIE(InfoExtractor):
                 'upload_date': upload_date,
             }
 
-        if not self._downloader.params.get('test', False) and not is_intentional:
+        if (not self._downloader.params.get('test', False) and
+                not is_intentional and
+                not self._downloader.params.get('force_generic_extractor', False)):
             self._downloader.report_warning('Falling back on generic information extractor.')
 
         if not full_response:
index 689fa75951e2fd390b93fa550807337bd6185aa2..096ab6137460e2298a5a1fa17eb81a36de65be32 100644 (file)
@@ -150,6 +150,10 @@ def parseOpts(overrideArguments=None):
         '--extractor-descriptions',
         action='store_true', dest='list_extractor_descriptions', default=False,
         help='Output descriptions of all supported extractors')
+    general.add_option(
+        '--force-generic-extractor',
+        action='store_true', dest='force_generic_extractor', default=False,
+        help='Force extraction to use the generic extractor')
     general.add_option(
         '--default-search',
         dest='default_search', metavar='PREFIX',