Merge remote-tracking branch 'dstftw/correct-valid-urls'
authorPhilipp Hagemeister <phihag@phihag.de>
Wed, 4 Dec 2013 18:56:05 +0000 (19:56 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Wed, 4 Dec 2013 18:56:05 +0000 (19:56 +0100)
1  2 
youtube_dl/extractor/appletrailers.py
youtube_dl/extractor/archiveorg.py
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/dreisat.py
youtube_dl/extractor/faz.py
youtube_dl/extractor/gametrailers.py
youtube_dl/extractor/viddler.py

index 4befff3942cd5f17fddb48bfb3b4c7f7623af1d6,5b522552a2c4e6db72ab3be7ab77d207793a576d..a527f10de250596e42f19f0957433e2a72fe5bbf
@@@ -10,7 -10,7 +10,7 @@@ from ..utils import 
  
  
  class AppleTrailersIE(InfoExtractor):
-     _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+     _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
      _TEST = {
          u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
          u"playlist": [
                  })
              formats = sorted(formats, key=lambda f: (f['height'], f['width']))
  
 -            info = {
 +            playlist.append({
                  '_type': 'video',
                  'id': video_id,
                  'title': title,
                  'upload_date': upload_date,
                  'uploader_id': uploader_id,
                  'user_agent': 'QuickTime compatible (youtube-dl)',
 -            }
 -            # TODO: Remove when #980 has been merged
 -            info['url'] = formats[-1]['url']
 -            info['ext'] = formats[-1]['ext']
 -
 -            playlist.append(info)
 +            })
  
          return {
              '_type': 'playlist',
index 3ae0aebb1275f0a4b1bed0c1dda3d969c0672a87,a8394bfb0d35487d4a0aa038de05805b074b64ca..8bb546410f7a7486bdaa964bc724cf2c501e8851
@@@ -11,7 -11,7 +11,7 @@@ from ..utils import 
  class ArchiveOrgIE(InfoExtractor):
      IE_NAME = 'archive.org'
      IE_DESC = 'archive.org videos'
-     _VALID_URL = r'(?:https?://)?(?:www\.)?archive.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
+     _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
      _TEST = {
          u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
          u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
@@@ -49,7 -49,7 +49,7 @@@
          for f in formats:
              f['ext'] = determine_ext(f['url'])
  
 -        info = {
 +        return {
              '_type': 'video',
              'id': video_id,
              'title': title,
              'description': description,
              'uploader': uploader,
              'upload_date': upload_date,
 +            'thumbnail': data.get('misc', {}).get('image'),
          }
 -        thumbnail = data.get('misc', {}).get('image')
 -        if thumbnail:
 -            info['thumbnail'] = thumbnail
 -
 -        # TODO: Remove when #980 has been merged
 -        info.update(formats[-1])
 -
 -        return info
index 53579aa2703e78150c14dfe5dfd35e6240310952,caea446eab42d7484c62f2b08e50f54e27e8849c..a54ce3ee7c44727a9e56b1ab8359bd099b48bb35
@@@ -1,7 -1,7 +1,7 @@@
  import re
  
  from .common import InfoExtractor
 -from .mtv import MTVIE, _media_xml_tag
 +from .mtv import MTVServicesInfoExtractor
  from ..utils import (
      compat_str,
      compat_urllib_parse,
@@@ -11,8 -11,8 +11,8 @@@
  )
  
  
 -class ComedyCentralIE(MTVIE):
 -    _VALID_URL = r'https?://(?:www\.)?comedycentral\.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
 +class ComedyCentralIE(MTVServicesInfoExtractor):
-     _VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
++    _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
      _FEED_URL = u'http://comedycentral.com/feeds/mrss/'
  
      _TEST = {
              u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
          },
      }
 -    # Overwrite MTVIE properties we don't want
 -    _TESTS = []
 -
 -    def _get_thumbnail_url(self, uri, itemdoc):
 -        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
 -        return itemdoc.find(search_path).attrib['url']
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@@ -191,7 -197,7 +191,7 @@@ class ComedyCentralShowsIE(InfoExtracto
                  })
  
              effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
 -            info = {
 +            results.append({
                  'id': shortMediaId,
                  'formats': formats,
                  'uploader': showId,
                  'title': effTitle,
                  'thumbnail': None,
                  'description': compat_str(officialTitle),
 -            }
 -
 -            # TODO: Remove when #980 has been merged
 -            info.update(info['formats'][-1])
 -
 -            results.append(info)
 +            })
  
          return results
index 24ce794255211112eafadaf2b5a629716b90aa5e,008c9969948a2fa5ea95857789806e001eb38999..cb7226f82a6af167569286918a56cce64e796150
@@@ -11,7 -11,7 +11,7 @@@ from ..utils import 
  
  class DreiSatIE(InfoExtractor):
      IE_NAME = '3sat'
-     _VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
+     _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
      _TEST = {
          u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
          u'file': u'36983.webm',
@@@ -65,7 -65,7 +65,7 @@@
              return (qidx, prefer_http, format['video_bitrate'])
          formats.sort(key=_sortkey)
  
 -        info = {
 +        return {
              '_type': 'video',
              'id': video_id,
              'title': video_title,
@@@ -76,3 -76,8 +76,3 @@@
              'uploader': video_uploader,
              'upload_date': upload_date,
          }
 -
 -        # TODO: Remove when #980 has been merged
 -        info.update(formats[-1])
 -
 -        return info
index d0dfde694b4d93f7249f2dd3a326ecb0bdca98dd,615674bafbc56173b30be6fa0b81999ffc3a996c..c6ab6952e84dc9074816f28ebb7fe6d8ce02cb47
@@@ -9,7 -9,7 +9,7 @@@ from ..utils import 
  
  class FazIE(InfoExtractor):
      IE_NAME = u'faz.net'
-     _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+).html'
+     _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'
  
      _TEST = {
          u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
              })
  
          descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description')
 -        info = {
 +        return {
              'id': video_id,
              'title': self._og_search_title(webpage),
              'formats': formats,
              'description': descr,
              'thumbnail': config.find('STILL/STILL_BIG').text,
          }
 -        # TODO: Remove when #980 has been merged
 -        info.update(formats[-1])
 -        return info
index 3a8bef250fa8eddd89af54291228ec3909c1453c,88f65603188883d57f9c506331933bf6d9de16f2..d82a5d4b2a30578298080f03a8bba5f502e48f20
@@@ -1,11 -1,13 +1,10 @@@
  import re
  
 -from .mtv import MTVIE, _media_xml_tag
 +from .mtv import MTVServicesInfoExtractor
  
 -class GametrailersIE(MTVIE):
 -    """
 -    Gametrailers use the same videos system as MTVIE, it just changes the feed
 -    url, where the uri is and the method to get the thumbnails.
 -    """
 +
 +class GametrailersIE(MTVServicesInfoExtractor):
-     _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
+     _VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
      _TEST = {
          u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
          u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
              u'description': u'Faith is back!  Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
          },
      }
 -    # Overwrite MTVIE properties we don't want
 -    _TESTS = []
  
      _FEED_URL = 'http://www.gametrailers.com/feeds/mrss'
  
 -    def _get_thumbnail_url(self, uri, itemdoc):
 -        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
 -        return itemdoc.find(search_path).attrib['url']
 -
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
index 75335dfb8797e83c7413f0c8bee86603ed429847,36d1bde08eff877e17958388f919d403cdd7a368..138a35b2a05006255fdceb8b8be90fa9a0f6ee4d
@@@ -8,7 -8,7 +8,7 @@@ from ..utils import 
  
  
  class ViddlerIE(InfoExtractor):
-     _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
+     _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler\.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
      _TEST = {
          u"url": u"http://www.viddler.com/v/43903784",
          u'file': u'43903784.mp4',
@@@ -47,7 -47,7 +47,7 @@@
              r"thumbnail\s*:\s*'([^']*)'",
              webpage, u'thumbnail', fatal=False)
  
 -        info = {
 +        return {
              '_type': 'video',
              'id': video_id,
              'title': title,
@@@ -56,3 -56,9 +56,3 @@@
              'duration': duration,
              'formats': formats,
          }
 -
 -        # TODO: Remove when #980 has been merged
 -        info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url'])
 -        info.update(info['formats'][-1])
 -
 -        return info