Merge pull request #2681 from phihag/readme-dev-instructions
authorPhilipp Hagemeister <phihag@phihag.de>
Thu, 3 Apr 2014 21:06:15 +0000 (23:06 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Thu, 3 Apr 2014 21:06:15 +0000 (23:06 +0200)
[README] Improve developer instructions

test/test_all_urls.py
youtube_dl/YoutubeDL.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/c56.py
youtube_dl/extractor/cnet.py [new file with mode: 0644]
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/wimp.py
youtube_dl/extractor/youtube.py
youtube_dl/utils.py
youtube_dl/version.py

index ed041ffda0614a956b5410b011b9d784b3b618c4..bea8c41fbf80e3171b47fab81f08e24c7ad35bda 100644 (file)
@@ -153,6 +153,9 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertMatch(
             'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
             ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
+            ['ComedyCentralShows'])
 
 if __name__ == '__main__':
     unittest.main()
index 430773edda28e5847ae7f702e40655c692c402a4..5794fdbe9f16897357892db883beef88e9f3a1e5 100644 (file)
@@ -876,7 +876,7 @@ class YoutubeDL(object):
 
         try:
             dn = os.path.dirname(encodeFilename(filename))
-            if dn != '' and not os.path.exists(dn):
+            if dn and not os.path.exists(dn):
                 os.makedirs(dn)
         except (OSError, IOError) as err:
             self.report_error('unable to create directory ' + compat_str(err))
index 7c3587e472f37a70d992c0ade4538679e7017125..c9c400b61f5a35fb1098b8dc27610f85d0ef78e2 100644 (file)
@@ -40,6 +40,7 @@ from .clipfish import ClipfishIE
 from .cliphunter import CliphunterIE
 from .clipsyndicate import ClipsyndicateIE
 from .cmt import CMTIE
+from .cnet import CNETIE
 from .cnn import (
     CNNIE,
     CNNBlogsIE,
index 690bc7c25fe2574faa473b122e8427137599c3cd..cb96c3876b7cbf02220d06ad86a44414d69c9fa8 100644 (file)
@@ -2,39 +2,46 @@
 from __future__ import unicode_literals
 
 import re
-import json
 
 from .common import InfoExtractor
 
 
 class C56IE(InfoExtractor):
-    _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
+    _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
     IE_NAME = '56.com'
     _TEST = {
         'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
-        'file': '93440716.flv',
         'md5': 'e59995ac63d0457783ea05f93f12a866',
         'info_dict': {
+            'id': '93440716',
+            'ext': 'flv',
             'title': '网事知多少 第32期:车怒',
+            'duration': 283.813,
         },
     }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
         text_id = mobj.group('textid')
-        info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
-                                           text_id, 'Downloading video info')
-        info = json.loads(info_page)['info']
-        formats = [{
-            'format_id': f['type'],
-            'filesize': int(f['filesize']),
-            'url': f['url']
-        } for f in info['rfiles']]
+
+        page = self._download_json(
+            'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
+
+        info = page['info']
+
+        formats = [
+            {
+                'format_id': f['type'],
+                'filesize': int(f['filesize']),
+                'url': f['url']
+            } for f in info['rfiles']
+        ]
         self._sort_formats(formats)
 
         return {
             'id': info['vid'],
             'title': info['Subject'],
+            'duration': int(info['duration']) / 1000.0,
             'formats': formats,
             'thumbnail': info.get('bimg') or info.get('img'),
         }
diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cnet.py
new file mode 100644 (file)
index 0000000..6a2f5ce
--- /dev/null
@@ -0,0 +1,70 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+)
+
+
+class CNETIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
+    _TEST = {
+        'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
+        'md5': '041233212a0d06b179c87cbcca1577b8',
+        'info_dict': {
+            'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
+            'ext': 'mp4',
+            'title': 'Hands-on with Microsoft Windows 8.1 Update',
+            'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
+            'thumbnail': 're:^http://.*/flmswindows8.jpg$',
+            'uploader_id': 'sarah.mitroff@cbsinteractive.com',
+            'uploader': 'Sarah Mitroff',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, display_id)
+        data_json = self._html_search_regex(
+            r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'",
+            webpage, 'data json')
+        data = json.loads(data_json)
+        vdata = data['video']
+
+        video_id = vdata['id']
+        title = vdata['headline']
+        description = vdata.get('dek')
+        thumbnail = vdata.get('image', {}).get('path')
+        author = vdata.get('author')
+        if author:
+            uploader = '%s %s' % (author['firstName'], author['lastName'])
+            uploader_id = author.get('email')
+        else:
+            uploader = None
+            uploader_id = None
+
+        formats = [{
+            'format_id': '%s-%s-%s' % (
+                f['type'], f['format'],
+                int_or_none(f.get('bitrate'), 1000, default='')),
+            'url': f['uri'],
+            'tbr': int_or_none(f.get('bitrate'), 1000),
+        } for f in vdata['files']['data']]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'formats': formats,
+            'description': description,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'thumbnail': thumbnail,
+        }
index 38577243caa475ca8f5028407e53c51d0e061487..0c99887a2e4ef7f3d1b2a75f605553119da16b3b 100644 (file)
@@ -41,7 +41,7 @@ class ComedyCentralShowsIE(InfoExtractor):
     _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
                       |https?://(:www\.)?
                           (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
-                         (full-episodes/(?P<episode>.*)|
+                         (full-episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
                           (?P<clip>
                               (?:(?:guests/[^/]+|videos)/[^/]+/(?P<videotitle>[^/?#]+))
                               |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
index 79fd53e0c8e85daae8efd86b70dc1302c1c0a629..c27dda9440e62274e13b9359f24c2a909516b4bc 100644 (file)
@@ -3,11 +3,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from .youtube import YoutubeIE
 
 
 class WimpIE(InfoExtractor):
     _VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.wimp.com/maruexhausted/',
         'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
         'info_dict': {
@@ -16,7 +17,20 @@ class WimpIE(InfoExtractor):
             'title': 'Maru is exhausted.',
             'description': 'md5:57e099e857c0a4ea312542b684a869b8',
         }
-    }
+    }, {
+        # youtube video
+        'url': 'http://www.wimp.com/clowncar/',
+        'info_dict': {
+            'id': 'cG4CEr2aiSg',
+            'ext': 'mp4',
+            'title': 'Basset hound clown car...incredible!',
+            'description': 'md5:8d228485e0719898c017203f900b3a35',
+            'uploader': 'Gretchen Hoey',
+            'uploader_id': 'gretchenandjeff1',
+            'upload_date': '20140303',
+        },
+        'add_ie': ['Youtube'],
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -24,6 +38,13 @@ class WimpIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
         video_url = self._search_regex(
             r's1\.addVariable\("file",\s*"([^"]+)"\);', webpage, 'video URL')
+        if YoutubeIE.suitable(video_url):
+            self.to_screen('Found YouTube video')
+            return {
+                '_type': 'url',
+                'url': video_url,
+                'ie_key': YoutubeIE.ie_key(),
+            }
 
         return {
             'id': video_id,
@@ -31,4 +52,4 @@ class WimpIE(InfoExtractor):
             'title': self._og_search_title(webpage),
             'thumbnail': self._og_search_thumbnail(webpage),
             'description': self._og_search_description(webpage),
-        }
\ No newline at end of file
+        }
index b6a1884b5c4475832b0991a0709ba06f41731dc1..e422d86e4fe6dda98c8e61b5d9c0a0c9ed3d50e6 100644 (file)
@@ -1753,7 +1753,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
 
 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
+    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
     _FEED_NAME = 'subscriptions'
     _PLAYLIST_TITLE = u'Youtube Subscriptions'
index e54ea9d617e217686691312ef53d5a42e71df03c..5f1f664c88f609868839ebf3f1db043a1c69d1d9 100644 (file)
@@ -1176,12 +1176,12 @@ class HEADRequest(compat_urllib_request.Request):
         return "HEAD"
 
 
-def int_or_none(v, scale=1):
-    return v if v is None else (int(v) // scale)
+def int_or_none(v, scale=1, default=None):
+    return default if v is None else (int(v) // scale)
 
 
-def float_or_none(v, scale=1):
-    return v if v is None else (float(v) / scale)
+def float_or_none(v, scale=1, default=None):
+    return default if v is None else (float(v) / scale)
 
 
 def parse_duration(s):
index ea24d76e64e616306927d601e49cd581588758cb..ac34122d94299919192201024c793b6d83250bc0 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.04.03.1'
+__version__ = '2014.04.03.3'