Merge remote-tracking branch 'hojel/slutload'
authorPhilipp Hagemeister <phihag@phihag.de>
Tue, 13 May 2014 08:00:49 +0000 (10:00 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Tue, 13 May 2014 08:00:49 +0000 (10:00 +0200)
12 files changed:
.travis.yml
test/helper.py
test/test_playlists.py
youtube_dl/__init__.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/common.py
youtube_dl/extractor/fc2.py [new file with mode: 0644]
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/videott.py [new file with mode: 0644]
youtube_dl/extractor/vine.py
youtube_dl/extractor/wdr.py
youtube_dl/version.py

index 45b71f11b62ee31d2547215c1c3999e6bf97129d..c6cc7a994be31732c563612d3560ba1cdf6379da 100644 (file)
@@ -3,6 +3,7 @@ python:
   - "2.6"
   - "2.7"
   - "3.3"
+  - "3.4"
 script: nosetests test --verbose
 notifications:
   email:
index d5e0a603e38285109f4892587ccfb9edffcb901d..230d2bd67ab06b4db552bff30c5620f83673ca93 100644 (file)
@@ -107,7 +107,7 @@ def expect_info_dict(self, expected_dict, got_dict):
         elif isinstance(expected, type):
             got = got_dict.get(info_field)
             self.assertTrue(isinstance(got, expected),
-                u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
+                u'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got)))
         else:
             if isinstance(expected, compat_str) and expected.startswith('md5:'):
                 got = 'md5:' + md5(got_dict.get(info_field))
index fe6274e6038c5dba36e29cda37f49b17fe3d006b..cc871698a7123b8a06986c5d78d3a95a32af1885 100644 (file)
@@ -23,6 +23,7 @@ from youtube_dl.extractor import (
     VimeoUserIE,
     VimeoAlbumIE,
     VimeoGroupsIE,
+    VineUserIE,
     UstreamChannelIE,
     SoundcloudSetIE,
     SoundcloudUserIE,
@@ -102,6 +103,13 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['title'], 'Rolex Awards for Enterprise')
         self.assertTrue(len(result['entries']) > 72)
 
+    def test_vine_user(self):
+        dl = FakeYDL()
+        ie = VineUserIE(dl)
+        result = ie.extract('https://vine.co/Visa')
+        self.assertIsPlaylist(result)
+        self.assertTrue(len(result['entries']) >= 50)
+
     def test_ustream_channel(self):
         dl = FakeYDL()
         ie = UstreamChannelIE(dl)
index 7ed8d197039c0b99990c161907905b2b8a859cac..4e657e297d1a5ddb2aac4a448a8a94a0ac4cf180 100644 (file)
@@ -54,6 +54,8 @@ __authors__  = (
     'phaer',
     'Sainyam Kapoor',
     'Nicolas Évrard',
+    'Jason Normore',
+    'Hoje Lee',
 )
 
 __license__ = 'Public Domain'
index 697bcab87dfe0a53755ddb7c3fc214ad378636e5..5095f14b620839170fbbe264b9810f1dc543ba93 100644 (file)
@@ -79,6 +79,7 @@ from .exfm import ExfmIE
 from .extremetube import ExtremeTubeIE
 from .facebook import FacebookIE
 from .faz import FazIE
+from .fc2 import FC2IE
 from .firstpost import FirstpostIE
 from .firsttv import FirstTVIE
 from .fivemin import FiveMinIE
@@ -296,6 +297,7 @@ from .videodetective import VideoDetectiveIE
 from .videolecturesnet import VideoLecturesNetIE
 from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
+from .videott import VideoTtIE
 from .videoweed import VideoWeedIE
 from .vimeo import (
     VimeoIE,
@@ -306,7 +308,10 @@ from .vimeo import (
     VimeoReviewIE,
     VimeoWatchLaterIE,
 )
-from .vine import VineIE
+from .vine import (
+    VineIE,
+    VineUserIE,
+)
 from .viki import VikiIE
 from .vk import VKIE
 from .vube import VubeIE
@@ -315,6 +320,7 @@ from .washingtonpost import WashingtonPostIE
 from .wat import WatIE
 from .wdr import (
     WDRIE,
+    WDRMobileIE,
     WDRMausIE,
 )
 from .weibo import WeiboIE
index 10b0cbe695a68b6a3c3928a6c89125020a2fff3d..11b31db88422229b37c85a96ed1df3746867bf5a 100644 (file)
@@ -242,10 +242,11 @@ class InfoExtractor(object):
                 url = url_or_request.get_full_url()
             except AttributeError:
                 url = url_or_request
-            if len(url) > 200:
-                h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest()
-                url = url[:200 - len(h)] + h
-            raw_filename = ('%s_%s.dump' % (video_id, url))
+            basen = video_id + '_' + url
+            if len(basen) > 240:
+                h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
+                basen = basen[:240 - len(h)] + h
+            raw_filename = basen + '.dump'
             filename = sanitize_filename(raw_filename, restricted=True)
             self.to_screen(u'Saving request to ' + filename)
             with open(filename, 'wb') as outf:
@@ -555,6 +556,16 @@ class InfoExtractor(object):
             if self._downloader.params.get('prefer_insecure', False)
             else 'https:')
 
+    def _proto_relative_url(self, url, scheme=None):
+        if url is None:
+            return url
+        if url.startswith('//'):
+            if scheme is None:
+                scheme = self.http_scheme()
+            return scheme + url
+        else:
+            return url
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py
new file mode 100644 (file)
index 0000000..ca89932
--- /dev/null
@@ -0,0 +1,60 @@
+#! -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+import hashlib
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    compat_urllib_request,
+    compat_urlparse,
+)
+
+
+class FC2IE(InfoExtractor):
+    _VALID_URL = r'^http://video\.fc2\.com/(?P<lang>[^/]+)/content/(?P<id>[^/]+)'
+    IE_NAME = 'fc2'
+    _TEST = {
+        'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
+        'md5': 'a6ebe8ebe0396518689d963774a54eb7',
+        'info_dict': {
+            'id': '20121103kUan1KHs',
+            'ext': 'flv',
+            'title': 'Boxing again with Puff',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        self._downloader.cookiejar.clear_session_cookies()  # must clear
+
+        title = self._og_search_title(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+        refer = url.replace('/content/', '/a/content/')
+
+        mimi = hashlib.md5(video_id + '_gGddgPfeaf_gzyr').hexdigest()
+
+        info_url = (
+            "http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
+            format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.','%2E')))
+
+        info_webpage = self._download_webpage(
+            info_url, video_id, note='Downloading info page')
+        info = compat_urlparse.parse_qs(info_webpage)
+
+        if 'err_code' in info:
+            raise ExtractorError('Error code: %s' % info['err_code'][0])
+
+        video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
+
+        return {
+            'id': video_id,
+            'title': info['title'][0],
+            'url': video_url,
+            'ext': 'flv',
+            'thumbnail': thumbnail,
+        }
index c4bd53fe7e8f061fee38d9a9a8c371a5901419ea..5f64e7bd0d98b74aea2a4350a51f057b4d0280ba 100644 (file)
@@ -4,9 +4,10 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
-    unified_strdate,
     compat_urllib_parse,
     ExtractorError,
+    int_or_none,
+    parse_iso8601,
 )
 
 
@@ -24,6 +25,10 @@ class MixcloudIE(InfoExtractor):
             'uploader': 'Daniel Holbach',
             'uploader_id': 'dholbach',
             'upload_date': '20111115',
+            'timestamp': 1321359578,
+            'thumbnail': 're:https?://.*\.jpg',
+            'view_count': int,
+            'like_count': int,
         },
     }
 
@@ -51,10 +56,6 @@ class MixcloudIE(InfoExtractor):
 
         webpage = self._download_webpage(url, track_id)
 
-        api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
-        info = self._download_json(
-            api_url, track_id, 'Downloading cloudcast info')
-
         preview_url = self._search_regex(
             r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
         song_url = preview_url.replace('/previews/', '/c/originals/')
@@ -65,16 +66,41 @@ class MixcloudIE(InfoExtractor):
             template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
             final_song_url = self._get_url(template_url)
         if final_song_url is None:
-            raise ExtractorError(u'Unable to extract track url')
+            raise ExtractorError('Unable to extract track url')
+
+        PREFIX = (
+            r'<div class="cloudcast-play-button-container"'
+            r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
+        title = self._html_search_regex(
+            PREFIX + r'm-title="([^"]+)"', webpage, 'title')
+        thumbnail = self._proto_relative_url(self._html_search_regex(
+            PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail',
+            fatal=False))
+        uploader = self._html_search_regex(
+            PREFIX + r'm-owner-name="([^"]+)"',
+            webpage, 'uploader', fatal=False)
+        uploader_id = self._search_regex(
+            r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
+        description = self._og_search_description(webpage)
+        like_count = int_or_none(self._search_regex(
+            r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
+            webpage, 'like count', fatal=False))
+        view_count = int_or_none(self._search_regex(
+            r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
+            webpage, 'play count', fatal=False))
+        timestamp = parse_iso8601(self._search_regex(
+            r'<time itemprop="dateCreated" datetime="([^"]+)">',
+            webpage, 'upload date'))
 
         return {
             'id': track_id,
-            'title': info['name'],
+            'title': title,
             'url': final_song_url,
-            'description': info.get('description'),
-            'thumbnail': info['pictures'].get('extra_large'),
-            'uploader': info['user']['name'],
-            'uploader_id': info['user']['username'],
-            'upload_date': unified_strdate(info['created_time']),
-            'view_count': info['play_count'],
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'timestamp': timestamp,
+            'view_count': view_count,
+            'like_count': like_count,
         }
diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py
new file mode 100644 (file)
index 0000000..b5034b0
--- /dev/null
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+import re
+import base64
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class VideoTtIE(InfoExtractor):
+    ID_NAME = 'video.tt'
+    IE_DESC = 'video.tt - Your True Tube'
+    _VALID_URL = r'http://(?:www\.)?video\.tt/(?:video/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})'
+
+    _TEST = {
+        'url': 'http://www.video.tt/watch_video.php?v=amd5YujV8',
+        'md5': 'b13aa9e2f267effb5d1094443dff65ba',
+        'info_dict': {
+            'id': 'amd5YujV8',
+            'ext': 'flv',
+            'title': 'Motivational video Change your mind in just 2.50 mins',
+            'description': '',
+            'upload_date': '20130827',
+            'uploader': 'joseph313',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        settings = self._download_json(
+            'http://www.video.tt/player_control/settings.php?v=%s' % video_id, video_id,
+            'Downloading video JSON')['settings']
+
+        video = settings['video_details']['video']
+
+        formats = [
+            {
+                'url': base64.b64decode(res['u']).decode('utf-8'),
+                'ext': 'flv',
+                'format_id': res['l'],
+            } for res in settings['res'] if res['u']
+        ]
+
+        return {
+            'id': video_id,
+            'title': video['title'],
+            'description': video['description'],
+            'thumbnail': settings['config']['thumbnail'],
+            'upload_date': unified_strdate(video['added']),
+            'uploader': video['owner'],
+            'view_count': int(video['view_count']),
+            'comment_count': int(video['comment_count']),
+            'like_count': int(video['liked']),
+            'dislike_count': int(video['disliked']),
+            'formats': formats,
+        }
\ No newline at end of file
index 5136ec466e608ca76f7cd68bfca3275d3f012f33..076c87119943f3879845ccc3aaf74cdbebf73859 100644 (file)
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
 
 import re
 import json
+import itertools
 
 from .common import InfoExtractor
 from ..utils import unified_strdate
@@ -58,3 +59,33 @@ class VineIE(InfoExtractor):
             'repost_count': data['reposts']['count'],
             'formats': formats,
         }
+
+
+class VineUserIE(InfoExtractor):
+    IE_NAME = 'vine:user'
+    _VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$'
+    _VINE_BASE_URL = "https://vine.co/"
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        user = mobj.group('user')
+
+        profile_url = "%sapi/users/profiles/vanity/%s" % (
+            self._VINE_BASE_URL, user)
+        profile_data = self._download_json(
+            profile_url, user, note='Downloading user profile data')
+
+        user_id = profile_data['data']['userId']
+        timeline_data = []
+        for pagenum in itertools.count(1):
+            timeline_url = "%sapi/timelines/users/%s?page=%s" % (
+                self._VINE_BASE_URL, user_id, pagenum)
+            timeline_page = self._download_json(
+                timeline_url, user, note='Downloading page %d' % pagenum)
+            timeline_data.extend(timeline_page['data']['records'])
+            if timeline_page['data']['nextPage'] is None:
+                break
+
+        entries = [
+            self.url_result(e['permalinkUrl'], 'Vine') for e in timeline_data]
+        return self.playlist_result(entries, user)
index 63691aa670ec7d3e0f936f7a9444b687dddeb9f3..feeb44b45ff32b4738957e44a6603cde93c8b9a4 100644 (file)
@@ -115,6 +115,34 @@ class WDRIE(InfoExtractor):
         }
 
 
+class WDRMobileIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+        https?://mobile-ondemand\.wdr\.de/
+        .*?/fsk(?P<age_limit>[0-9]+)
+        /[0-9]+/[0-9]+/
+        (?P<id>[0-9]+)_(?P<title>[0-9]+)'''
+    IE_NAME = 'wdr:mobile'
+    _TEST = {
+        'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4',
+        'info_dict': {
+            'title': '4283021',
+            'id': '421735',
+            'age_limit': 0,
+        },
+        '_skip': 'Will be depublicized shortly'
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        return {
+            'id': mobj.group('id'),
+            'title': mobj.group('title'),
+            'age_limit': int(mobj.group('age_limit')),
+            'url': url,
+            'user_agent': 'mobile',
+        }
+
+
 class WDRMausIE(InfoExtractor):
     _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
     IE_DESC = 'Sendung mit der Maus'
index d6d60691372349cc4cd2d581e9669aa4a88e0f09..89a2f72dc49d63646ea9f3670f86c968977a27ce 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.05.05'
+__version__ = '2014.05.12'