Merge remote-tracking branch 'dstftw/correct-valid-urls'

author Philipp Hagemeister <phihag@phihag.de>

Wed, 4 Dec 2013 18:56:05 +0000 (19:56 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Wed, 4 Dec 2013 18:56:05 +0000 (19:56 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Wed, 4 Dec 2013 18:56:05 +0000 (19:56 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Wed, 4 Dec 2013 18:56:05 +0000 (19:56 +0100)
diff --git a/README.md b/README.md

index 031e436b637c8f5ee99a92b4850ae014a12607e9..029c418d16e332c73942bf6b60ac6470d8b8429f 100644 (file)
--- a/README.md
+++ b/README.md
@@ -30,7 +30,8 @@ which means you can modify it, redistribute it or use it however you like.
      --list-extractors          List all supported extractors and the URLs they
                                 would handle
      --extractor-descriptions   Output descriptions of all supported extractors
-    --proxy URL                Use the specified HTTP/HTTPS proxy
+    --proxy URL                Use the specified HTTP/HTTPS proxy. Pass in an
+                               empty string (--proxy "") for direct connection
      --no-check-certificate     Suppress HTTPS certificate validation.
      --cache-dir DIR            Location in the filesystem where youtube-dl can
                                 store downloaded information permanently. By
@@ -55,7 +56,7 @@ which means you can modify it, redistribute it or use it however you like.
      --dateafter DATE           download only videos uploaded after this date
      --no-playlist              download only the currently playing video
      --age-limit YEARS          download only videos suitable for the given age
-    --download-archive FILE    Download only videos not present in the archive
+    --download-archive FILE    Download only videos not listed in the archive
                                 file. Record the IDs of all downloaded videos in
                                 it.
  
@@ -183,7 +184,7 @@ which means you can modify it, redistribute it or use it however you like.
  
  # CONFIGURATION
  
-You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`.
+You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
  
  # OUTPUT TEMPLATE
  
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index 2eeef2ae93ccb3cfac9c1c1ead18f7e58cdcd6a1..d2446b6706a6eb239cf52a00fb775ef0eb9cac9f 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -81,11 +81,11 @@ from .PostProcessor import (
  
  
  def parseOpts(overrideArguments=None):
-    def _readOptions(filename_bytes):
+    def _readOptions(filename_bytes, default=[]):
          try:
              optionf = open(filename_bytes)
          except IOError:
-            return [] # silently skip if file is not present
+            return default  # silently skip if file is not present
          try:
              res = []
              for l in optionf:
@@ -191,7 +191,9 @@ def parseOpts(overrideArguments=None):
      general.add_option('--extractor-descriptions',
              action='store_true', dest='list_extractor_descriptions',
              help='Output descriptions of all supported extractors', default=False)
-    general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
+    general.add_option(
+        '--proxy', dest='proxy', default=None, metavar='URL',
+        help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
      general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
      general.add_option(
          '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
@@ -224,7 +226,7 @@ def parseOpts(overrideArguments=None):
                           default=None, type=int)
      selection.add_option('--download-archive', metavar='FILE',
                           dest='download_archive',
-                         help='Download only videos not present in the archive file. Record the IDs of all downloaded videos in it.')
+                         help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
  
  
      authentication.add_option('-u', '--username',
@@ -419,6 +421,8 @@ def parseOpts(overrideArguments=None):
          if opts.verbose:
              write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
      else:
+        systemConf = _readOptions('/etc/youtube-dl.conf')
+
          xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
          if xdg_config_home:
              userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
@@ -428,8 +432,31 @@ def parseOpts(overrideArguments=None):
              userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
              if not os.path.isfile(userConfFile):
                  userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
-        systemConf = _readOptions('/etc/youtube-dl.conf')
-        userConf = _readOptions(userConfFile)
+        userConf = _readOptions(userConfFile, None)
+
+        if userConf is None:
+            appdata_dir = os.environ.get('appdata')
+            if appdata_dir:
+                userConf = _readOptions(
+                    os.path.join(appdata_dir, 'youtube-dl', 'config'),
+                    default=None)
+                if userConf is None:
+                    userConf = _readOptions(
+                        os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
+                        default=None)
+
+        if userConf is None:
+            userConf = _readOptions(
+                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
+                default=None)
+        if userConf is None:
+            userConf = _readOptions(
+                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
+                default=None)
+
+        if userConf is None:
+            userConf = []
+
          commandLineConf = sys.argv[1:]
          argv = systemConf + userConf + commandLineConf
          opts, args = parser.parse_args(argv)
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py

index 5b522552a2c4e6db72ab3be7ab77d207793a576d..a527f10de250596e42f19f0957433e2a72fe5bbf 100644 (file)
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -113,7 +113,7 @@ class AppleTrailersIE(InfoExtractor):
                  })
              formats = sorted(formats, key=lambda f: (f['height'], f['width']))
  
-            info = {
+            playlist.append({
                  '_type': 'video',
                  'id': video_id,
                  'title': title,
@@ -124,12 +124,7 @@ class AppleTrailersIE(InfoExtractor):
                  'upload_date': upload_date,
                  'uploader_id': uploader_id,
                  'user_agent': 'QuickTime compatible (youtube-dl)',
-            }
-            # TODO: Remove when #980 has been merged
-            info['url'] = formats[-1]['url']
-            info['ext'] = formats[-1]['ext']
-
-            playlist.append(info)
+            })
  
          return {
              '_type': 'playlist',
diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py

index a8394bfb0d35487d4a0aa038de05805b074b64ca..8bb546410f7a7486bdaa964bc724cf2c501e8851 100644 (file)
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dl/extractor/archiveorg.py
@@ -49,7 +49,7 @@ class ArchiveOrgIE(InfoExtractor):
          for f in formats:
              f['ext'] = determine_ext(f['url'])
  
-        info = {
+        return {
              '_type': 'video',
              'id': video_id,
              'title': title,
@@ -57,12 +57,5 @@ class ArchiveOrgIE(InfoExtractor):
              'description': description,
              'uploader': uploader,
              'upload_date': upload_date,
+            'thumbnail': data.get('misc', {}).get('image'),
          }
-        thumbnail = data.get('misc', {}).get('image')
-        if thumbnail:
-            info['thumbnail'] = thumbnail
-
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-
-        return info
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py

index caea446eab42d7484c62f2b08e50f54e27e8849c..a54ce3ee7c44727a9e56b1ab8359bd099b48bb35 100644 (file)
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -1,7 +1,7 @@
  import re
  
  from .common import InfoExtractor
-from .mtv import MTVIE, _media_xml_tag
+from .mtv import MTVServicesInfoExtractor
  from ..utils import (
      compat_str,
      compat_urllib_parse,
@@ -11,8 +11,8 @@ from ..utils import (
  )
  
  
-class ComedyCentralIE(MTVIE):
-    _VALID_URL = r'https?://(?:www\.)?comedycentral\.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
+class ComedyCentralIE(MTVServicesInfoExtractor):
+    _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
      _FEED_URL = u'http://comedycentral.com/feeds/mrss/'
  
      _TEST = {
@@ -25,12 +25,6 @@ class ComedyCentralIE(MTVIE):
              u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
          },
      }
-    # Overwrite MTVIE properties we don't want
-    _TESTS = []
-
-    def _get_thumbnail_url(self, uri, itemdoc):
-        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
-        return itemdoc.find(search_path).attrib['url']
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@ -197,7 +191,7 @@ class ComedyCentralShowsIE(InfoExtractor):
                  })
  
              effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
-            info = {
+            results.append({
                  'id': shortMediaId,
                  'formats': formats,
                  'uploader': showId,
@@ -205,11 +199,6 @@ class ComedyCentralShowsIE(InfoExtractor):
                  'title': effTitle,
                  'thumbnail': None,
                  'description': compat_str(officialTitle),
-            }
-
-            # TODO: Remove when #980 has been merged
-            info.update(info['formats'][-1])
-
-            results.append(info)
+            })
  
          return results
diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py

index 3d1dcb793627cb2d642c974f689c130faffe9ff1..d418ce4a8a29c122e811c96aac76d388c790b560 100644 (file)
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -28,7 +28,8 @@ class DaumIE(InfoExtractor):
          video_id = mobj.group(1)
          canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
          webpage = self._download_webpage(canonical_url, video_id)
-        full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
+        full_id = self._search_regex(
+            r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
              webpage, u'full id')
          query = compat_urllib_parse.urlencode({'vid': full_id})
          info = self._download_xml(
@@ -56,7 +57,7 @@ class DaumIE(InfoExtractor):
                  'format_id': profile,
              })
  
-        info = {
+        return {
              'id': video_id,
              'title': info.find('TITLE').text,
              'formats': formats,
@@ -65,6 +66,3 @@ class DaumIE(InfoExtractor):
              'duration': int(info.find('DURATION').text),
              'upload_date': info.find('REGDTTM').text[:8],
          }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py

index 008c9969948a2fa5ea95857789806e001eb38999..cb7226f82a6af167569286918a56cce64e796150 100644 (file)
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -65,7 +65,7 @@ class DreiSatIE(InfoExtractor):
              return (qidx, prefer_http, format['video_bitrate'])
          formats.sort(key=_sortkey)
  
-        info = {
+        return {
              '_type': 'video',
              'id': video_id,
              'title': video_title,
@@ -76,8 +76,3 @@ class DreiSatIE(InfoExtractor):
              'uploader': video_uploader,
              'upload_date': upload_date,
          }
-
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-
-        return info
diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py

index 615674bafbc56173b30be6fa0b81999ffc3a996c..c6ab6952e84dc9074816f28ebb7fe6d8ce02cb47 100644 (file)
--- a/youtube_dl/extractor/faz.py
+++ b/youtube_dl/extractor/faz.py
@@ -44,13 +44,10 @@ class FazIE(InfoExtractor):
              })
  
          descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description')
-        info = {
+        return {
              'id': video_id,
              'title': self._og_search_title(webpage),
              'formats': formats,
              'description': descr,
              'thumbnail': config.find('STILL/STILL_BIG').text,
          }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py

index 9645b00c3307a42ba48b66af599345ba80349a3d..26b7d2ae531f785bc3177af4029652c531d840da 100644 (file)
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -47,13 +47,10 @@ class GameSpotIE(InfoExtractor):
                  'format_id': q,
              })
  
-        info = {
+        return {
              'id': data_video['guid'],
              'title': compat_urllib_parse.unquote(data_video['title']),
              'formats': formats,
              'description': get_meta_content('description', webpage),
              'thumbnail': self._og_search_thumbnail(webpage),
          }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
diff --git a/youtube_dl/extractor/gametrailers.py b/youtube_dl/extractor/gametrailers.py

index 88f65603188883d57f9c506331933bf6d9de16f2..d82a5d4b2a30578298080f03a8bba5f502e48f20 100644 (file)
--- a/youtube_dl/extractor/gametrailers.py
+++ b/youtube_dl/extractor/gametrailers.py
@@ -1,12 +1,9 @@
  import re
  
-from .mtv import MTVIE, _media_xml_tag
+from .mtv import MTVServicesInfoExtractor
  
-class GametrailersIE(MTVIE):
-    """
-    Gametrailers use the same videos system as MTVIE, it just changes the feed
-    url, where the uri is and the method to get the thumbnails.
-    """
+
+class GametrailersIE(MTVServicesInfoExtractor):
      _VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
      _TEST = {
          u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
@@ -17,15 +14,9 @@ class GametrailersIE(MTVIE):
              u'description': u'Faith is back!  Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
          },
      }
-    # Overwrite MTVIE properties we don't want
-    _TESTS = []
  
      _FEED_URL = 'http://www.gametrailers.com/feeds/mrss'
  
-    def _get_thumbnail_url(self, uri, itemdoc):
-        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
-        return itemdoc.find(search_path).attrib['url']
-
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
diff --git a/youtube_dl/extractor/metacritic.py b/youtube_dl/extractor/metacritic.py

index 449138b569f80c97154ea79ac874617efc484a3f..6b95b4998852ac61d1061e0dcf6c3f442772fee2 100644 (file)
--- a/youtube_dl/extractor/metacritic.py
+++ b/youtube_dl/extractor/metacritic.py
@@ -43,13 +43,10 @@ class MetacriticIE(InfoExtractor):
          description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
              webpage, u'description', flags=re.DOTALL)
  
-        info = {
+        return {
              'id': video_id,
              'title': clip.find('title').text,
              'formats': formats,
              'description': description,
              'duration': int(clip.find('duration').text),
          }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py

index 42aee58befdbe9e0e73a72dfefddd30a7c7cbd81..6b3feb560768f96c4d5b3bb3adc0989ecf1c1d4f 100644 (file)
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -10,35 +10,8 @@ from ..utils import (
  def _media_xml_tag(tag):
      return '{http://search.yahoo.com/mrss/}%s' % tag
  
-class MTVIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
-
-    _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
-
-    _TESTS = [
-        {
-            u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
-            u'file': u'853555.mp4',
-            u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
-            u'info_dict': {
-                u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
-                u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
-            },
-        },
-        {
-            u'add_ie': ['Vevo'],
-            u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
-            u'file': u'USCJY1331283.mp4',
-            u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
-            u'info_dict': {
-                u'title': u'Everything Has Changed',
-                u'upload_date': u'20130606',
-                u'uploader': u'Taylor Swift',
-            },
-            u'skip': u'VEVO is only available in some countries',
-        },
-    ]
  
+class MTVServicesInfoExtractor(InfoExtractor):
      @staticmethod
      def _id_from_uri(uri):
          return uri.split(':')[-1]
@@ -53,7 +26,12 @@ class MTVIE(InfoExtractor):
          return base + m.group('finalid')
  
      def _get_thumbnail_url(self, uri, itemdoc):
-        return 'http://mtv.mtvnimages.com/uri/' + uri
+        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
+        thumb_node = itemdoc.find(search_path)
+        if thumb_node is None:
+            return None
+        else:
+            return thumb_node.attrib['url']
  
      def _extract_video_formats(self, metadataXml):
          if '/error_country_block.swf' in metadataXml:
@@ -93,7 +71,7 @@ class MTVIE(InfoExtractor):
          else:
              description = None
  
-        info = {
+        return {
              'title': itemdoc.find('title').text,
              'formats': self._extract_video_formats(mediagen_page),
              'id': video_id,
@@ -101,11 +79,6 @@ class MTVIE(InfoExtractor):
              'description': description,
          }
  
-        # TODO: Remove when #980 has been merged
-        info.update(info['formats'][-1])
-
-        return info
-
      def _get_videos_info(self, uri):
          video_id = self._id_from_uri(uri)
          data = compat_urllib_parse.urlencode({'uri': uri})
@@ -113,6 +86,39 @@ class MTVIE(InfoExtractor):
                                           u'Downloading info')
          return [self._get_video_info(item) for item in idoc.findall('.//item')]
  
+
+class MTVIE(MTVServicesInfoExtractor):
+    _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
+
+    _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
+
+    _TESTS = [
+        {
+            u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
+            u'file': u'853555.mp4',
+            u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
+            u'info_dict': {
+                u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
+                u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
+            },
+        },
+        {
+            u'add_ie': ['Vevo'],
+            u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
+            u'file': u'USCJY1331283.mp4',
+            u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
+            u'info_dict': {
+                u'title': u'Everything Has Changed',
+                u'upload_date': u'20130606',
+                u'uploader': u'Taylor Swift',
+            },
+            u'skip': u'VEVO is only available in some countries',
+        },
+    ]
+
+    def _get_thumbnail_url(self, uri, itemdoc):
+        return 'http://mtv.mtvnimages.com/uri/' + uri
+
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('videoid')
diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py

index d290397c7874736948850038f0f7d0b0fc565128..c012ec0cfacb2afea6b395c5c87509f53ed58614 100644 (file)
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -56,7 +56,7 @@ class NaverIE(InfoExtractor):
                  'height': int(format_el.find('height').text),
              })
  
-        info = {
+        return {
              'id': video_id,
              'title': info.find('Subject').text,
              'formats': formats,
@@ -65,6 +65,3 @@ class NaverIE(InfoExtractor):
              'upload_date': info.find('WriteDate').text.replace('.', ''),
              'view_count': int(info.find('PlayCount').text),
          }
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-        return info
diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py

index 3bbda128e1a3881ffd0b7a81e6c45da128994db5..c2254ae8abdca2ab9dde2388fb2182b056ffd0e2 100644 (file)
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -30,7 +30,7 @@ class RedTubeIE(InfoExtractor):
              r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL')
  
          video_title = self._html_search_regex(
-            r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
+            r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
              webpage, u'title')
  
          # No self-labeling, but they describe themselves as
diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py

index a711531e668bbc3ba32bfa3a93872c5f25ac73ab..fd90cc5dd18f966242d658df1e133456271c8ee3 100644 (file)
--- a/youtube_dl/extractor/southparkstudios.py
+++ b/youtube_dl/extractor/southparkstudios.py
@@ -1,15 +1,14 @@
  import re
  
-from .mtv import MTVIE, _media_xml_tag
+from .mtv import MTVServicesInfoExtractor
  
  
-class SouthParkStudiosIE(MTVIE):
+class SouthParkStudiosIE(MTVServicesInfoExtractor):
      IE_NAME = u'southparkstudios.com'
      _VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
  
      _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
  
-    # Overwrite MTVIE properties we don't want
      _TESTS = [{
          u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
          u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
@@ -19,14 +18,6 @@ class SouthParkStudiosIE(MTVIE):
          },
      }]
  
-    def _get_thumbnail_url(self, uri, itemdoc):
-        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
-        thumb_node = itemdoc.find(search_path)
-        if thumb_node is None:
-            return None
-        else:
-            return thumb_node.attrib['url']
-
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          url = u'http://www.' + mobj.group(u'url')
diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py

index 1c49e580d19f65561f539b93d07d15eac4e9f0e4..d64aaa41f690956b08211ed4fe07e1bc27267641 100644 (file)
--- a/youtube_dl/extractor/trilulilu.py
+++ b/youtube_dl/extractor/trilulilu.py
@@ -55,7 +55,7 @@ class TriluliluIE(InfoExtractor):
              for fnode in format_doc.findall('./formats/format')
          ]
  
-        info = {
+        return {
              '_type': 'video',
              'id': video_id,
              'formats': formats,
@@ -64,7 +64,3 @@ class TriluliluIE(InfoExtractor):
              'thumbnail': thumbnail,
          }
  
-        # TODO: Remove when #980 has been merged
-        info.update(formats[-1])
-
-        return info
diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py

index 36d1bde08eff877e17958388f919d403cdd7a368..138a35b2a05006255fdceb8b8be90fa9a0f6ee4d 100644 (file)
--- a/youtube_dl/extractor/viddler.py
+++ b/youtube_dl/extractor/viddler.py
@@ -47,7 +47,7 @@ class ViddlerIE(InfoExtractor):
              r"thumbnail\s*:\s*'([^']*)'",
              webpage, u'thumbnail', fatal=False)
  
-        info = {
+        return {
              '_type': 'video',
              'id': video_id,
              'title': title,
@@ -56,9 +56,3 @@ class ViddlerIE(InfoExtractor):
              'duration': duration,
              'formats': formats,
          }
-
-        # TODO: Remove when #980 has been merged
-        info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url'])
-        info.update(info['formats'][-1])
-
-        return info
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py

index 7444d3393a25f8a49778a5bd589aa839591bd9d8..279f75e7a1f5b860e81d955c33bb58fcea092cbc 100644 (file)
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -26,7 +26,7 @@ class XHamsterIE(InfoExtractor):
      {
          u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
          u'file': u'2221348.flv',
-        u'md5': u'e767b9475de189320f691f49c679c4c7',
+        u'md5': u'970a94178ca4118c5aa3aaea21211b81',
          u'info_dict': {
              u"upload_date": u"20130914",
              u"uploader_id": u"jojo747400",
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 765b4a9bf769926350350e83d680d35c1e4d2775..7fff761bd0b5a7835c5b4a11c3a1d15ac67567d8 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -336,7 +336,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  u"uploader": u"Philipp Hagemeister",
                  u"uploader_id": u"phihag",
                  u"upload_date": u"20121002",
-                u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
+                u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
              }
          },
          {
@@ -1366,6 +1366,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          # description
          video_description = get_element_by_id("eow-description", video_webpage)
          if video_description:
+            video_description = re.sub(r'''(?x)
+                <a\s+
+                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
+                    title="([^"]+)"\s+
+                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
+                    class="yt-uix-redirect-link"\s*>
+                [^<]+
+                </a>
+            ''', r'\1', video_description)
              video_description = clean_html(video_description)
          else:
              fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
@@ -1765,6 +1774,7 @@ class YoutubeSearchIE(SearchInfoExtractor):
          return self.playlist_result(videos, query)
  
  class YoutubeSearchDateIE(YoutubeSearchIE):
+    IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
      _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
      _SEARCH_KEY = 'ytsearchdate'
      IE_DESC = u'YouTube.com searches, newest videos first'
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index d8f341ab980942cb3ac5d6ca67313f4ecbfc79c9..68b30bfd4a4ec455f3dad230e1fc30c353d807ca 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
  
-__version__ = '2013.12.02'
+__version__ = '2013.12.04'
author	Philipp Hagemeister <phihag@phihag.de>
	Wed, 4 Dec 2013 18:56:05 +0000 (19:56 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Wed, 4 Dec 2013 18:56:05 +0000 (19:56 +0100)
README.md		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/extractor/appletrailers.py		patch \| blob \| history
youtube_dl/extractor/archiveorg.py		patch \| blob \| history
youtube_dl/extractor/comedycentral.py		patch \| blob \| history
youtube_dl/extractor/daum.py		patch \| blob \| history
youtube_dl/extractor/dreisat.py		patch \| blob \| history
youtube_dl/extractor/faz.py		patch \| blob \| history
youtube_dl/extractor/gamespot.py		patch \| blob \| history
youtube_dl/extractor/gametrailers.py		patch \| blob \| history
youtube_dl/extractor/metacritic.py		patch \| blob \| history
youtube_dl/extractor/mtv.py		patch \| blob \| history
youtube_dl/extractor/naver.py		patch \| blob \| history
youtube_dl/extractor/redtube.py		patch \| blob \| history
youtube_dl/extractor/southparkstudios.py		patch \| blob \| history
youtube_dl/extractor/trilulilu.py		patch \| blob \| history
youtube_dl/extractor/viddler.py		patch \| blob \| history
youtube_dl/extractor/xhamster.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history