Merge pull request #403 from FiloSottile/re_VERBOSE
authorFilippo Valsorda <filosottile.wiki@gmail.com>
Sat, 29 Sep 2012 15:02:38 +0000 (17:02 +0200)
committerFilippo Valsorda <filosottile.wiki@gmail.com>
Sat, 29 Sep 2012 15:05:40 +0000 (17:05 +0200)
1  2 
youtube_dl/InfoExtractors.py

index 4fcff77ff7e55ee4ff3c7a4e8fb7d940b72cb716,3875e7fd8c7560e58cf0da7ee619579bfd04d1da..bdb2ec3110321c2628b281d36062ffa1bdb0905f
@@@ -97,7 -95,24 +97,25 @@@ class InfoExtractor(object)
  class YoutubeIE(InfoExtractor):
        """Information extractor for youtube.com."""
  
-       _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|tube\.majestyc\.net/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
+       _VALID_URL = r"""^
+                        (
+                            (?:https?://)?                                       # http(s):// (optional)
 -                           (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/) # the various hostnames, with wildcard subdomains
++                           (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
++                              tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains
+                            (?!view_play_list|my_playlists|artist|playlist)      # ignore playlist URLs
+                            (?:                                                  # the various things that can precede the ID:
+                                (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
+                                |(?:                                             # or the v= param in all its forms
+                                    (?:watch(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+                                    (?:\?|\#!?)                                  # the params delimiter ? or # or #!
+                                    (?:.+&)?                                     # any other preceding param (like /?s=tuff&v=xxxx)
+                                    v=
+                                )
+                            )?                                                   # optional -> youtube.com/xxxx is OK
+                        )?                                                       # all until now is optional -> you can pass the naked ID
+                        ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID
+                        (?(1).+)?                                                # if we found the ID, everything can follow
+                        $"""
        _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
        _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
        _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'