Merge remote-tracking branch 'jaimeMF/opus-fix'

author Philipp Hagemeister <phihag@phihag.de>

Sun, 13 Oct 2013 13:26:10 +0000 (15:26 +0200)

committer Philipp Hagemeister <phihag@phihag.de>

Sun, 13 Oct 2013 13:26:10 +0000 (15:26 +0200)
author Philipp Hagemeister <phihag@phihag.de>
Sun, 13 Oct 2013 13:26:10 +0000 (15:26 +0200)
committer Philipp Hagemeister <phihag@phihag.de>
Sun, 13 Oct 2013 13:26:10 +0000 (15:26 +0200)
diff --git a/test/__init__.py b/test/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py

index 943f9a315f804b6c6b7f8f31c9688ef1e5ee6d41..ec3e30572fa6c15e7a3c24839e40f409428f41b2 100644 (file)
--- a/test/test_age_restriction.py
+++ b/test/test_age_restriction.py
@@ -8,7 +8,7 @@ import os
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
  from youtube_dl import YoutubeDL
-from helper import try_rm
+from .helper import try_rm
  
  
  def _download_restricted(url, filename, age):
diff --git a/test/test_all_urls.py b/test/test_all_urls.py

index ff1c86efebe31f2d6477cbfe6246baa50607d6a7..b28ad000bc8d7a1f172ee12bd218e0b96ad08c2b 100644 (file)
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -8,7 +8,7 @@ import os
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
  from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE, gen_extractors
-from helper import get_testcases
+from .helper import get_testcases
  
  class TestAllURLsMatching(unittest.TestCase):
      def setUp(self):
diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py

index ed2ad311df0035010be978e3515e88e18f7ba11a..e655d280deb0640e32f71a3d76abac16e7d7f68e 100644 (file)
--- a/test/test_dailymotion_subtitles.py
+++ b/test/test_dailymotion_subtitles.py
@@ -10,7 +10,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
  from youtube_dl.extractor import DailymotionIE
  from youtube_dl.utils import *
-from helper import FakeYDL
+from .helper import FakeYDL
  
  md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
  
diff --git a/test/test_download.py b/test/test_download.py

index fdf59bb5c6af88bc0ee8dcbcbb4bc72b383b7968..68da4d98450e12a3bae790e43e62f5d8dc9b7909 100644 (file)
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -37,8 +37,8 @@ def _file_md5(fn):
      with open(fn, 'rb') as f:
          return hashlib.md5(f.read()).hexdigest()
  
-import helper  # Set up remaining global configuration
-from helper import get_testcases, try_rm
+import test.helper as helper  # Set up remaining global configuration
+from .helper import get_testcases, try_rm
  defs = get_testcases()
  
  with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
diff --git a/test/test_playlists.py b/test/test_playlists.py

index de8bd298a3e6c5c54518869b2bac2c166c5c3226..108a4d63bc60e8bc5a20335798ae43ed510eea1f 100644 (file)
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -20,7 +20,7 @@ from youtube_dl.extractor import (
  )
  from youtube_dl.utils import *
  
-from helper import FakeYDL
+from .helper import FakeYDL
  
  class TestPlaylists(unittest.TestCase):
      def assertIsPlaylist(self, info):
diff --git a/test/test_utils.py b/test/test_utils.py

index ff2e9885bdba233699edd69457aed91de35c8708..f2c03d42149b3cacac4af11ea16df6cd45c6e4d0 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -20,6 +20,7 @@ from youtube_dl.utils import (
      unified_strdate,
      find_xpath_attr,
      get_meta_content,
+    xpath_with_ns,
  )
  
  if sys.version_info < (3, 0):
@@ -141,5 +142,18 @@ class TestUtil(unittest.TestCase):
          self.assertEqual(get_meta('description'), u'foo & bar')
          self.assertEqual(get_meta('author'), 'Plato')
  
+    def test_xpath_with_ns(self):
+        testxml = u'''<root xmlns:media="http://example.com/">
+            <media:song>
+                <media:author>The Author</media:author>
+                <url>http://server.com/download.mp3</url>
+            </media:song>
+        </root>'''
+        doc = xml.etree.ElementTree.fromstring(testxml)
+        find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'}))
+        self.assertTrue(find('media:song') is not None)
+        self.assertEqual(find('media:song/media:author').text, u'The Author')
+        self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3')
+
  if __name__ == '__main__':
      unittest.main()
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py

index 53e65816dfb9268c40831a57c781a169fbb050b7..0b5c790301b6c7c1a87d43c5ae7789de545e8efa 100644 (file)
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -11,7 +11,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE
  from youtube_dl.utils import *
  
-from helper import FakeYDL
+from .helper import FakeYDL
  
  class TestYoutubeLists(unittest.TestCase):
      def assertIsPlaylist(self,info):
diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py

index f9b0c1ad0b532191a6d4f1e97be5d86ad20d37fd..07850385e0d33bc17f5de3b68698374c838bf6fc 100644 (file)
--- a/test/test_youtube_subtitles.py
+++ b/test/test_youtube_subtitles.py
@@ -10,7 +10,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
  from youtube_dl.extractor import YoutubeIE
  from youtube_dl.utils import *
-from helper import FakeYDL
+from .helper import FakeYDL
  
  md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
  
diff --git a/tox.ini b/tox.ini

index 53b461fdbb5de0b0e12d574e76d9e1215c4d83ee..ed01e3386d8efcaff7bb846ac1f83c3d62763fb0 100644 (file)
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,8 @@
  [tox]
  envlist = py26,py27,py33
  [testenv]
-deps = nose
-commands = nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose test
+deps =
+   nose
+   coverage
+commands = nosetests --verbose {posargs:test}  # --with-coverage --cover-package=youtube_dl --cover-html
+                                               # test.test_download:TestDownload.test_NowVideo
diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py

index 07b6895c0abb5c424edf6345cefae1c08d97f088..039e014982e2396ad3175a4c3fcd3dc15030952e 100644 (file)
--- a/youtube_dl/PostProcessor.py
+++ b/youtube_dl/PostProcessor.py
@@ -83,6 +83,8 @@ class FFmpegPostProcessor(PostProcessor):
                 + opts +
                 [encodeFilename(self._ffmpeg_filename_argument(out_path))])
  
+        if self._downloader.params.get('verbose', False):
+            self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
          p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
          stdout,stderr = p.communicate()
          if p.returncode != 0:
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 226c3a762e802b4f98bb5ba33d4ed2a74d5f7b32..d76945a48cda7206aa723015bd8dbbf3cf9a216e 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -2,7 +2,12 @@ from .appletrailers import AppleTrailersIE
  from .addanime import AddAnimeIE
  from .archiveorg import ArchiveOrgIE
  from .ard import ARDIE
-from .arte import ArteTvIE
+from .arte import (
+    ArteTvIE,
+    ArteTVPlus7IE,
+    ArteTVCreativeIE,
+    ArteTVFutureIE,
+)
  from .auengine import AUEngineIE
  from .bandcamp import BandcampIE
  from .bliptv import BlipTVIE, BlipTVUserIE
@@ -62,6 +67,7 @@ from .ign import IGNIE, OneUPIE
  from .ina import InaIE
  from .infoq import InfoQIE
  from .instagram import InstagramIE
+from .internetvideoarchive import InternetVideoArchiveIE
  from .jeuxvideo import JeuxVideoIE
  from .jukebox import JukeboxIE
  from .justintv import JustinTVIE
@@ -83,6 +89,7 @@ from .nba import NBAIE
  from .nbc import NBCNewsIE
  from .newgrounds import NewgroundsIE
  from .nhl import NHLIE, NHLVideocenterIE
+from .nowvideo import NowVideoIE
  from .ooyala import OoyalaIE
  from .orf import ORFIE
  from .pbs import PBSIE
@@ -92,6 +99,7 @@ from .rbmaradio import RBMARadioIE
  from .redtube import RedTubeIE
  from .ringtv import RingTVIE
  from .ro220 import Ro220IE
+from .rottentomatoes import RottenTomatoesIE
  from .roxwel import RoxwelIE
  from .rtlnow import RTLnowIE
  from .sina import SinaIE
@@ -121,6 +129,7 @@ from .veoh import VeohIE
  from .vevo import VevoIE
  from .vice import ViceIE
  from .viddler import ViddlerIE
+from .videodetective import VideoDetectiveIE
  from .videofyme import VideofyMeIE
  from .vimeo import VimeoIE, VimeoChannelIE
  from .vine import VineIE
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py

index 4707d7ccab51502dadf787ab2a2fb1558a1c9d45..5ee8a67b14699a330914cd4f0e0f627ca9fca5a5 100644 (file)
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -1,3 +1,4 @@
+# encoding: utf-8
  import re
  import json
  import xml.etree.ElementTree
@@ -7,15 +8,15 @@ from ..utils import (
      ExtractorError,
      find_xpath_attr,
      unified_strdate,
+    determine_ext,
+    get_element_by_id,
  )
  
+# There are different sources of video in arte.tv, the extraction process 
+# is different for each one. The videos usually expire in 7 days, so we can't
+# add tests.
+
  class ArteTvIE(InfoExtractor):
-    """
-    There are two sources of video in arte.tv: videos.arte.tv and
-    www.arte.tv/guide, the extraction process is different for each one.
-    The videos expire in 7 days, so we can't add tests.
-    """
-    _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
      _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
      _LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
      _LIVE_URL = r'index-[0-9]+\.html$'
@@ -24,7 +25,7 @@ class ArteTvIE(InfoExtractor):
  
      @classmethod
      def suitable(cls, url):
-        return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL))
+        return any(re.match(regex, url) for regex in (cls._VIDEOS_URL, cls._LIVEWEB_URL))
  
      # TODO implement Live Stream
      # from ..utils import compat_urllib_parse
@@ -55,14 +56,6 @@ class ArteTvIE(InfoExtractor):
      #     video_url = u'%s/%s' % (info.get('url'), info.get('path'))
  
      def _real_extract(self, url):
-        mobj = re.match(self._EMISSION_URL, url)
-        if mobj is not None:
-            lang = mobj.group('lang')
-            # This is not a real id, it can be for example AJT for the news
-            # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
-            video_id = mobj.group('id')
-            return self._extract_emission(url, video_id, lang)
-
          mobj = re.match(self._VIDEOS_URL, url)
          if mobj is not None:
              id = mobj.group('id')
@@ -80,59 +73,6 @@ class ArteTvIE(InfoExtractor):
              # self.extractLiveStream(url)
              # return
  
-    def _extract_emission(self, url, video_id, lang):
-        """Extract from www.arte.tv/guide"""
-        webpage = self._download_webpage(url, video_id)
-        json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
-
-        json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
-        self.report_extraction(video_id)
-        info = json.loads(json_info)
-        player_info = info['videoJsonPlayer']
-
-        info_dict = {'id': player_info['VID'],
-                     'title': player_info['VTI'],
-                     'description': player_info.get('VDE'),
-                     'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
-                     'thumbnail': player_info['programImage'],
-                     'ext': 'flv',
-                     }
-
-        formats = player_info['VSR'].values()
-        def _match_lang(f):
-            # Return true if that format is in the language of the url
-            if lang == 'fr':
-                l = 'F'
-            elif lang == 'de':
-                l = 'A'
-            regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
-            return any(re.match(r, f['versionCode']) for r in regexes)
-        # Some formats may not be in the same language as the url
-        formats = filter(_match_lang, formats)
-        # Some formats use the m3u8 protocol
-        formats = filter(lambda f: f['videoFormat'] != 'M3U8', formats)
-        # We order the formats by quality
-        formats = sorted(formats, key=lambda f: int(f['height']))
-        # Prefer videos without subtitles in the same language
-        formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None)
-        # Pick the best quality
-        def _format(format_info):
-            info = {'ext': 'flv',
-                    'width': format_info.get('width'),
-                    'height': format_info.get('height'),
-                    }
-            if format_info['mediaType'] == u'rtmp':
-                info['url'] = format_info['streamer']
-                info['play_path'] = 'mp4:' + format_info['url']
-            else:
-                info_dict['url'] = format_info['url']
-            return info
-        info_dict['formats'] = [_format(f) for f in formats]
-        # TODO: Remove when #980 has been merged 
-        info_dict.update(info_dict['formats'][-1])
-
-        return info_dict
-
      def _extract_video(self, url, video_id, lang):
          """Extract from videos.arte.tv"""
          ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
@@ -182,3 +122,110 @@ class ArteTvIE(InfoExtractor):
                  'ext': 'flv',
                  'thumbnail': self._og_search_thumbnail(webpage),
                  }
+
+
+class ArteTVPlus7IE(InfoExtractor):
+    IE_NAME = u'arte.tv:+7'
+    _VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
+
+    @classmethod
+    def _extract_url_info(cls, url):
+        mobj = re.match(cls._VALID_URL, url)
+        lang = mobj.group('lang')
+        # This is not a real id, it can be for example AJT for the news
+        # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
+        video_id = mobj.group('id')
+        return video_id, lang
+
+    def _real_extract(self, url):
+        video_id, lang = self._extract_url_info(url)
+        webpage = self._download_webpage(url, video_id)
+        return self._extract_from_webpage(webpage, video_id, lang)
+
+    def _extract_from_webpage(self, webpage, video_id, lang):
+        json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
+
+        json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
+        self.report_extraction(video_id)
+        info = json.loads(json_info)
+        player_info = info['videoJsonPlayer']
+
+        info_dict = {
+            'id': player_info['VID'],
+            'title': player_info['VTI'],
+            'description': player_info.get('VDE'),
+            'upload_date': unified_strdate(player_info.get('VDA', '').split(' ')[0]),
+            'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
+        }
+
+        formats = player_info['VSR'].values()
+        def _match_lang(f):
+            if f.get('versionCode') is None:
+                return True
+            # Return true if that format is in the language of the url
+            if lang == 'fr':
+                l = 'F'
+            elif lang == 'de':
+                l = 'A'
+            regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
+            return any(re.match(r, f['versionCode']) for r in regexes)
+        # Some formats may not be in the same language as the url
+        formats = filter(_match_lang, formats)
+        # Some formats use the m3u8 protocol
+        formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats)
+        # We order the formats by quality
+        formats = sorted(formats, key=lambda f: int(f.get('height',-1)))
+        # Prefer videos without subtitles in the same language
+        formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None)
+        # Pick the best quality
+        def _format(format_info):
+            info = {
+                'width': format_info.get('width'),
+                'height': format_info.get('height'),
+            }
+            if format_info['mediaType'] == u'rtmp':
+                info['url'] = format_info['streamer']
+                info['play_path'] = 'mp4:' + format_info['url']
+                info['ext'] = 'flv'
+            else:
+                info['url'] = format_info['url']
+                info['ext'] = determine_ext(info['url'])
+            return info
+        info_dict['formats'] = [_format(f) for f in formats]
+        # TODO: Remove when #980 has been merged 
+        info_dict.update(info_dict['formats'][-1])
+
+        return info_dict
+
+
+# It also uses the arte_vp_url url from the webpage to extract the information
+class ArteTVCreativeIE(ArteTVPlus7IE):
+    IE_NAME = u'arte.tv:creative'
+    _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)'
+
+    _TEST = {
+        u'url': u'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
+        u'file': u'050489-002.mp4',
+        u'info_dict': {
+            u'title': u'Agentur Amateur #2 - Corporate Design',
+        },
+    }
+
+
+class ArteTVFutureIE(ArteTVPlus7IE):
+    IE_NAME = u'arte.tv:future'
+    _VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de)/(thema|sujet)/.*?#article-anchor-(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
+        u'file': u'050940-003.mp4',
+        u'info_dict': {
+            u'title': u'Les champignons au secours de la planète',
+        },
+    }
+
+    def _real_extract(self, url):
+        anchor_id, lang = self._extract_url_info(url)
+        webpage = self._download_webpage(url, anchor_id)
+        row = get_element_by_id(anchor_id, webpage)
+        return self._extract_from_webpage(row, anchor_id, lang)
diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py

new file mode 100644 (file)

index 0000000..5986459
--- /dev/null
+++ b/youtube_dl/extractor/internetvideoarchive.py
@@ -0,0 +1,87 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urlparse,
+    compat_urllib_parse,
+    xpath_with_ns,
+    determine_ext,
+)
+
+
+class InternetVideoArchiveIE(InfoExtractor):
+    _VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
+
+    _TEST = {
+        u'url': u'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',
+        u'file': u'452693.mp4',
+        u'info_dict': {
+            u'title': u'SKYFALL',
+            u'description': u'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
+            u'duration': 156,
+        },
+    }
+
+    @staticmethod
+    def _build_url(query):
+        return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
+
+    @staticmethod
+    def _clean_query(query):
+        NEEDED_ARGS = ['publishedid', 'customerid']
+        query_dic = compat_urlparse.parse_qs(query)
+        cleaned_dic = dict((k,v[0]) for (k,v) in query_dic.items() if k in NEEDED_ARGS)
+        # Other player ids return m3u8 urls
+        cleaned_dic['playerid'] = '247'
+        cleaned_dic['videokbrate'] = '100000'
+        return compat_urllib_parse.urlencode(cleaned_dic)
+
+    def _real_extract(self, url):
+        query = compat_urlparse.urlparse(url).query
+        query_dic = compat_urlparse.parse_qs(query)
+        video_id = query_dic['publishedid'][0]
+        url = self._build_url(query)
+
+        flashconfiguration_xml = self._download_webpage(url, video_id,
+            u'Downloading flash configuration')
+        flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
+        file_url = flashconfiguration.find('file').text
+        file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
+        # Replace some of the parameters in the query to get the best quality
+        # and http links (no m3u8 manifests)
+        file_url = re.sub(r'(?<=\?)(.+)$',
+            lambda m: self._clean_query(m.group()),
+            file_url)
+        info_xml = self._download_webpage(file_url, video_id,
+            u'Downloading video info')
+        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+        item = info.find('channel/item')
+
+        def _bp(p):
+            return xpath_with_ns(p,
+                {'media': 'http://search.yahoo.com/mrss/',
+                'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'})
+        formats = []
+        for content in item.findall(_bp('media:group/media:content')):
+            attr = content.attrib
+            f_url = attr['url']
+            formats.append({
+                'url': f_url,
+                'ext': determine_ext(f_url),
+                'width': int(attr['width']),
+                'bitrate': int(attr['bitrate']),
+            })
+        formats = sorted(formats, key=lambda f: f['bitrate'])
+
+        info = {
+            'id': video_id,
+            'title': item.find('title').text,
+            'formats': formats,
+            'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'],
+            'description': item.find('description').text,
+            'duration': int(attr['duration']),
+        }
+        # TODO: Remove when #980 has been merged
+        info.update(formats[-1])
+        return info
diff --git a/youtube_dl/extractor/nowvideo.py b/youtube_dl/extractor/nowvideo.py

new file mode 100644 (file)

index 0000000..ab52ad4
--- /dev/null
+++ b/youtube_dl/extractor/nowvideo.py
@@ -0,0 +1,43 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import compat_urlparse
+
+
+class NowVideoIE(InfoExtractor):
+    _VALID_URL = r'(?:https?://)?(?:www\.)?nowvideo\.ch/video/(?P<id>\w+)'
+    _TEST = {
+        u'url': u'http://www.nowvideo.ch/video/0mw0yow7b6dxa',
+        u'file': u'0mw0yow7b6dxa.flv',
+        u'md5': u'f8fbbc8add72bd95b7850c6a02fc8817',
+        u'info_dict': {
+            u"title": u"youtubedl test video _BaW_jenozKc.mp4"
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+
+        video_id = mobj.group('id')
+        webpage_url = 'http://www.nowvideo.ch/video/' + video_id
+        webpage = self._download_webpage(webpage_url, video_id)
+
+        self.report_extraction(video_id)
+
+        video_title = self._html_search_regex(r'<h4>(.*)</h4>',
+            webpage, u'video title')
+
+        video_key = self._search_regex(r'var fkzd="(.*)";',
+            webpage, u'video key')
+
+        api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key)
+        api_response = self._download_webpage(api_call, video_id,
+            u'Downloading API page')
+        video_url = compat_urlparse.parse_qs(api_response)[u'url'][0]
+
+        return [{
+            'id':        video_id,
+            'url':       video_url,
+            'ext':       'flv',
+            'title':     video_title,
+        }]
diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py

new file mode 100644 (file)

index 0000000..c79c394
--- /dev/null
+++ b/youtube_dl/extractor/rottentomatoes.py
@@ -0,0 +1,16 @@
+from .videodetective import VideoDetectiveIE
+
+
+# It just uses the same method as videodetective.com,
+# the internetvideoarchive.com is extracted from the og:video property
+class RottenTomatoesIE(VideoDetectiveIE):
+    _VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
+        u'file': '613340.mp4',
+        u'info_dict': {
+            u'title': u'TOY STORY 3',
+            u'description': u'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
+        },
+    }
diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py

new file mode 100644 (file)

index 0000000..d89f840
--- /dev/null
+++ b/youtube_dl/extractor/videodetective.py
@@ -0,0 +1,30 @@
+import re
+
+from .common import InfoExtractor
+from .internetvideoarchive import InternetVideoArchiveIE
+from ..utils import (
+    compat_urlparse,
+)
+
+
+class VideoDetectiveIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487',
+        u'file': u'194487.mp4',
+        u'info_dict': {
+            u'title': u'KICK-ASS 2',
+            u'description': u'md5:65ba37ad619165afac7d432eaded6013',
+            u'duration': 138,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        og_video = self._og_search_video_url(webpage)
+        query = compat_urlparse.urlparse(og_video).query
+        return self.url_result(InternetVideoArchiveIE._build_url(query),
+            ie=InternetVideoArchiveIE.ie_key())
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 82a1daeb9075a056aa908e0a91bbd83b8897673a..3e81c308b27fb9618dba89a30fd5964651f48cbf 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -9,6 +9,7 @@ import io
  import json
  import locale
  import os
+import pipes
  import platform
  import re
  import socket
@@ -229,6 +230,19 @@ else:
                  return f
          return None
  
+# On python2.6 the xml.etree.ElementTree.Element methods don't support
+# the namespace parameter
+def xpath_with_ns(path, ns_map):
+    components = [c.split(':') for c in path.split('/')]
+    replaced = []
+    for c in components:
+        if len(c) == 1:
+            replaced.append(c[0])
+        else:
+            ns, tag = c
+            replaced.append('{%s}%s' % (ns_map[ns], tag))
+    return '/'.join(replaced)
+
  def htmlentity_transform(matchobj):
      """Transforms an HTML entity to a character.
  
@@ -927,3 +941,7 @@ class locked_file(object):
  
      def read(self, *args):
          return self.f.read(*args)
+
+
+def shell_quote(args):
+    return ' '.join(map(pipes.quote, args))
author	Philipp Hagemeister <phihag@phihag.de>
	Sun, 13 Oct 2013 13:26:10 +0000 (15:26 +0200)
committer	Philipp Hagemeister <phihag@phihag.de>
	Sun, 13 Oct 2013 13:26:10 +0000 (15:26 +0200)
test/__init__.py	[new file with mode: 0644]	patch \| blob
test/test_age_restriction.py		patch \| blob \| history
test/test_all_urls.py		patch \| blob \| history
test/test_dailymotion_subtitles.py		patch \| blob \| history
test/test_download.py		patch \| blob \| history
test/test_playlists.py		patch \| blob \| history
test/test_utils.py		patch \| blob \| history
test/test_youtube_lists.py		patch \| blob \| history
test/test_youtube_subtitles.py		patch \| blob \| history
tox.ini		patch \| blob \| history
youtube_dl/PostProcessor.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/arte.py		patch \| blob \| history
youtube_dl/extractor/internetvideoarchive.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/nowvideo.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/rottentomatoes.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/videodetective.py	[new file with mode: 0644]	patch \| blob
youtube_dl/utils.py		patch \| blob \| history