Merge pull request #5961 from dstftw/force-generic-extractor

author Sergey M. <dstftw@gmail.com>

Wed, 24 Jun 2015 14:10:45 +0000 (19:10 +0500)

committer Sergey M. <dstftw@gmail.com>

Wed, 24 Jun 2015 14:10:45 +0000 (19:10 +0500)
author Sergey M. <dstftw@gmail.com>
Wed, 24 Jun 2015 14:10:45 +0000 (19:10 +0500)
committer Sergey M. <dstftw@gmail.com>
Wed, 24 Jun 2015 14:10:45 +0000 (19:10 +0500)
diff --combined youtube_dl/YoutubeDL.py

index 6e4b6f56664f67a796b21f10b8d005f2f6e5b68d,a7d3a1c017fb6230639b522c1b36c356cfcaf93e..ef0f71bad45d6057dc99c1ce968629a0e357e57b
--- 1/youtube_dl/YoutubeDL.py
--- 2/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@@ -119,7 -119,7 +119,7 @@@ class YoutubeDL(object)
   
       username:          Username for authentication purposes.
       password:          Password for authentication purposes.
- -    videopassword:     Password for acces a video.
+ +    videopassword:     Password for accessing a video.
       usenetrc:          Use netrc for authentication instead.
       verbose:           Print additional info to stdout.
       quiet:             Do not print messages to stdout.
@@@ -139,6 -139,7 +139,7 @@@
       outtmpl:           Template for output names.
       restrictfilenames: Do not allow "&" and spaces in file names
       ignoreerrors:      Do not stop on download errors.
+     force_generic_extractor: Force downloader to use the generic extractor
       nooverwrites:      Prevent overwriting files.
       playliststart:     Playlist item to start at.
       playlistend:       Playlist item to end at.
@@@ -626,13 -627,16 +627,16 @@@
               info_dict.setdefault(key, value)
   
       def extract_info(self, url, download=True, ie_key=None, extra_info={},
-                      process=True):
+                      process=True, force_generic_extractor=False):
           '''
           Returns a list with a dictionary for each video we find.
           If 'download', also downloads the videos.
           extra_info is a dict containing the extra values to add to each result
           '''
   
+         if not ie_key and force_generic_extractor:
+             ie_key = 'Generic'
+ 
           if ie_key:
               ies = [self.get_info_extractor(ie_key)]
           else:
@@@ -1033,6 -1037,12 +1037,6 @@@
               info_dict['id'], info_dict.get('subtitles'),
               info_dict.get('automatic_captions'))
   
- -        # This extractors handle format selection themselves
- -        if info_dict['extractor'] in ['Youku']:
- -            if download:
- -                self.process_info(info_dict)
- -            return info_dict
- -
           # We now pick which formats have to be downloaded
           if info_dict.get('formats') is None:
               # There's only one format available
@@@ -1493,7 -1503,8 +1497,8 @@@
           for url in url_list:
               try:
                   # It also downloads the videos
-                 res = self.extract_info(url)
+                 res = self.extract_info(
+                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
               except UnavailableVideoError:
                   self.report_error('unable to download video')
               except MaxDownloadsReached:
diff --combined youtube_dl/extractor/generic.py

index 5c03fddc6a2ee548a6617a3aea9ba161f6b3777d,c8582bda97f2b9704c3d53d53490296f0f8df0ac..7769ffc5c5f425ce04dc92147c77803291f0fdd5
--- 1/youtube_dl/extractor/generic.py
--- 2/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@@ -42,10 -42,6 +42,10 @@@ from .udn import UDNEmbedI
   from .senateisvp import SenateISVPIE
   from .bliptv import BlipTVIE
   from .svt import SVTIE
+ +from .pornhub import PornHubIE
+ +from .xhamster import XHamsterEmbedIE
+ +from .vimeo import VimeoIE
+ +from .dailymotion import DailymotionCloudIE
   
   
   class GenericIE(InfoExtractor):
@@@ -336,15 -332,6 +336,15 @@@
                   'skip_download': True,
               },
           },
+ +        # XHamster embed
+ +        {
+ +            'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
+ +            'info_dict': {
+ +                'id': 'showthread',
+ +                'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
+ +            },
+ +            'playlist_mincount': 7,
+ +        },
           # Embedded TED video
           {
               'url': 'http://en.support.wordpress.com/videos/ted-talks/',
@@@ -824,29 -811,6 +824,29 @@@
                   'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
                   'uploader': 'Rogers Sportsnet',
               },
+ +        },
+ +        # Dailymotion Cloud video
+ +        {
+ +            'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
+ +            'md5': '49444254273501a64675a7e68c502681',
+ +            'info_dict': {
+ +                'id': '5585de919473990de4bee11b',
+ +                'ext': 'mp4',
+ +                'title': 'Le débat',
+ +                'thumbnail': 're:^https?://.*\.jpe?g$',
+ +            }
+ +        },
+ +        # AdobeTVVideo embed
+ +        {
+ +            'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
+ +            'md5': '43662b577c018ad707a63766462b1e87',
+ +            'info_dict': {
+ +                'id': '2456',
+ +                'ext': 'mp4',
+ +                'title': 'New experience with Acrobat DC',
+ +                'description': 'New experience with Acrobat DC',
+ +                'duration': 248.667,
+ +            },
           }
       ]
   
@@@ -1014,7 -978,9 +1014,9 @@@
               }
   
           if not self._downloader.params.get('test', False) and not is_intentional:
-             self._downloader.report_warning('Falling back on generic information extractor.')
+             force = self._downloader.params.get('force_generic_extractor', False)
+             self._downloader.report_warning(
+                 '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
   
           if not full_response:
               request = compat_urllib_request.Request(url)
@@@ -1124,9 -1090,18 +1126,9 @@@
           if matches:
               return _playlist_from_matches(matches, ie='RtlNl')
   
- -        # Look for embedded (iframe) Vimeo player
- -        mobj = re.search(
- -            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
- -        if mobj:
- -            player_url = unescapeHTML(mobj.group('url'))
- -            surl = smuggle_url(player_url, {'Referer': url})
- -            return self.url_result(surl)
- -        # Look for embedded (swf embed) Vimeo player
- -        mobj = re.search(
- -            r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
- -        if mobj:
- -            return self.url_result(mobj.group(1))
+ +        vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
+ +        if vimeo_url is not None:
+ +            return self.url_result(vimeo_url)
   
           # Look for embedded YouTube player
           matches = re.findall(r'''(?x)
@@@ -1348,16 -1323,6 +1350,16 @@@
           if sportbox_urls:
               return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
   
+ +        # Look for embedded PornHub player
+ +        pornhub_url = PornHubIE._extract_url(webpage)
+ +        if pornhub_url:
+ +            return self.url_result(pornhub_url, 'PornHub')
+ +
+ +        # Look for embedded XHamster player
+ +        xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
+ +        if xhamster_urls:
+ +            return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
+ +
           # Look for embedded Tvigle player
           mobj = re.search(
               r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
@@@ -1525,20 -1490,6 +1527,20 @@@
           if senate_isvp_url:
               return self.url_result(senate_isvp_url, 'SenateISVP')
   
+ +        # Look for Dailymotion Cloud videos
+ +        dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
+ +        if dmcloud_url:
+ +            return self.url_result(dmcloud_url, 'DailymotionCloud')
+ +
+ +        # Look for AdobeTVVideo embeds
+ +        mobj = re.search(
+ +            r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
+ +            webpage)
+ +        if mobj is not None:
+ +            return self.url_result(
+ +                self._proto_relative_url(unescapeHTML(mobj.group(1))),
+ +                'AdobeTVVideo')
+ +
           def check_video(vurl):
               if YoutubeIE.suitable(vurl):
                   return True
diff --combined youtube_dl/options.py

index 740458e51483f45f8d8474d68edaaac48b24941e,096ab6137460e2298a5a1fa17eb81a36de65be32..6aeca61ee5e099e80a5d98a893afb8adde72e1cd
--- 1/youtube_dl/options.py
--- 2/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@@ -150,6 -150,10 +150,10 @@@ def parseOpts(overrideArguments=None)
           '--extractor-descriptions',
           action='store_true', dest='list_extractor_descriptions', default=False,
           help='Output descriptions of all supported extractors')
+     general.add_option(
+         '--force-generic-extractor',
+         action='store_true', dest='force_generic_extractor', default=False,
+         help='Force extraction to use the generic extractor')
       general.add_option(
           '--default-search',
           dest='default_search', metavar='PREFIX',
@@@ -725,7 -729,7 +729,7 @@@
           metavar='POLICY', dest='fixup', default='detect_or_warn',
           help='Automatically correct known faults of the file. '
                'One of never (do nothing), warn (only emit a warning), '
- -             'detect_or_warn(the default; fix file if we can, warn otherwise)')
+ +             'detect_or_warn (the default; fix file if we can, warn otherwise)')
       postproc.add_option(
           '--prefer-avconv',
           action='store_false', dest='prefer_ffmpeg',
author	Sergey M. <dstftw@gmail.com>
	Wed, 24 Jun 2015 14:10:45 +0000 (19:10 +0500)
committer	Sergey M. <dstftw@gmail.com>
	Wed, 24 Jun 2015 14:10:45 +0000 (19:10 +0500)
		1	2
youtube_dl/YoutubeDL.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/extractor/generic.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/options.py	patch \|	diff1 \|	diff2 \|	blob \| history