From: Philipp Hagemeister
Date: Wed, 5 Mar 2014 13:01:53 +0000 (+0100)
Subject: Merge remote-tracking branch 'rzhxeo/embedly'
X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=f55a1f0a8815c89b01a7a353cfa0bd5118f75829;hp=1b86cc41cf3f7db5893597b4b1956104a77147f7;p=youtube-dl
Merge remote-tracking branch 'rzhxeo/embedly'
Conflicts:
youtube_dl/extractor/generic.py
---
diff --git a/README.md b/README.md
index 49b62f13f..ccd94b2dc 100644
--- a/README.md
+++ b/README.md
@@ -124,8 +124,12 @@ which means you can modify it, redistribute it or use it however you like.
video id, %(playlist)s for the playlist the
video is in, %(playlist_index)s for the
position in the playlist and %% for a
- literal percent. Use - to output to stdout.
- Can also be used to download to a different
+ literal percent. %(height)s and %(width)s
+ for the width and height of the video
+ format. %(resolution)s for a textual
+ description of the resolution of the video
+ format. Use - to output to stdout. Can also
+ be used to download to a different
directory, for example with -o '/my/downloa
ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
--autonumber-size NUMBER Specifies the number of digits in
diff --git a/devscripts/release.sh b/devscripts/release.sh
index 323acf8cf..72e708c7f 100755
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@@ -14,9 +14,9 @@
set -e
-skip_tests=false
-if [ "$1" = '--skip-test' ]; then
- skip_tests=true
+skip_tests=true
+if [ "$1" = '--run-tests' ]; then
+ skip_tests=false
shift
fi
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index 7a78005a3..5c17a39fb 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -71,6 +71,10 @@ class TestAllURLsMatching(unittest.TestCase):
def test_youtube_truncated(self):
self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
+ def test_youtube_search_matching(self):
+ self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
+ self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
+
def test_justin_tv_channelid_matching(self):
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
diff --git a/test/test_download.py b/test/test_download.py
index 7587a18aa..bbbb6b78a 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -18,6 +18,7 @@ from test.helper import (
import hashlib
import io
import json
+import re
import socket
import youtube_dl.YoutubeDL
@@ -72,9 +73,7 @@ def generator(test_case):
if 'playlist' not in test_case:
info_dict = test_case.get('info_dict', {})
if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
- print_skipping('The output file cannot be know, the "file" '
- 'key is missing or the info_dict is incomplete')
- return
+ raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
if 'skip' in test_case:
print_skipping(test_case['skip'])
return
@@ -137,12 +136,21 @@ def generator(test_case):
with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof)
for (info_field, expected) in tc.get('info_dict', {}).items():
- if isinstance(expected, compat_str) and expected.startswith('md5:'):
- got = 'md5:' + md5(info_dict.get(info_field))
- else:
+ if isinstance(expected, compat_str) and expected.startswith('re:'):
got = info_dict.get(info_field)
- self.assertEqual(expected, got,
- u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
+ match_str = expected[len('re:'):]
+ match_rex = re.compile(match_str)
+
+ self.assertTrue(
+ isinstance(got, compat_str) and match_rex.match(got),
+ u'field %s (value: %r) should match %r' % (info_field, got, match_str))
+ else:
+ if isinstance(expected, compat_str) and expected.startswith('md5:'):
+ got = 'md5:' + md5(info_dict.get(info_field))
+ else:
+ got = info_dict.get(info_field)
+ self.assertEqual(expected, got,
+ u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
# If checkable fields are missing from the test case, print the info_dict
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
diff --git a/test/test_playlists.py b/test/test_playlists.py
index 25bec9f1c..4bd815a0e 100644
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -36,6 +36,7 @@ from youtube_dl.extractor import (
RutubeChannelIE,
GoogleSearchIE,
GenericIE,
+ TEDIE,
)
@@ -170,12 +171,12 @@ class TestPlaylists(unittest.TestCase):
def test_AcademicEarthCourse(self):
dl = FakeYDL()
ie = AcademicEarthCourseIE(dl)
- result = ie.extract('http://academicearth.org/courses/building-dynamic-websites/')
+ result = ie.extract('http://academicearth.org/playlists/laws-of-nature/')
self.assertIsPlaylist(result)
- self.assertEqual(result['id'], 'building-dynamic-websites')
- self.assertEqual(result['title'], 'Building Dynamic Websites')
- self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
- self.assertEqual(len(result['entries']), 10)
+ self.assertEqual(result['id'], 'laws-of-nature')
+ self.assertEqual(result['title'], 'Laws of Nature')
+ self.assertEqual(result['description'],u'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.')# u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
+ self.assertEqual(len(result['entries']), 4)
def test_ivi_compilation(self):
dl = FakeYDL()
@@ -259,5 +260,14 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], 'Zero Punctuation')
self.assertTrue(len(result['entries']) > 10)
+ def test_ted_playlist(self):
+ dl = FakeYDL()
+ ie = TEDIE(dl)
+ result = ie.extract('http://www.ted.com/playlists/who_are_the_hackers')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['id'], '10')
+ self.assertEqual(result['title'], 'Who are the hackers?')
+ self.assertTrue(len(result['entries']) >= 6)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_utils.py b/test/test_utils.py
index 84553b943..4e3c37fb4 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Various small unit tests
+import io
import xml.etree.ElementTree
#from youtube_dl.utils import htmlentity_transform
@@ -21,6 +22,7 @@ from youtube_dl.utils import (
orderedSet,
PagedList,
parse_duration,
+ read_batch_urls,
sanitize_filename,
shell_quote,
smuggle_url,
@@ -250,5 +252,14 @@ class TestUtil(unittest.TestCase):
def test_struct_unpack(self):
self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))
+ def test_read_batch_urls(self):
+ f = io.StringIO(u'''\xef\xbb\xbf foo
+ bar\r
+ baz
+ # More after this line\r
+ ; or after this
+ bam''')
+ self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index 38ac989ce..7d3b9c705 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -16,6 +16,7 @@ from youtube_dl.extractor import (
YoutubeChannelIE,
YoutubeShowIE,
YoutubeTopListIE,
+ YoutubeSearchURLIE,
)
@@ -118,6 +119,8 @@ class TestYoutubeLists(unittest.TestCase):
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
def test_youtube_toptracks(self):
+ print('Skipping: The playlist page gives error 500')
+ return
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
@@ -131,5 +134,14 @@ class TestYoutubeLists(unittest.TestCase):
entries = result['entries']
self.assertTrue(len(entries) >= 5)
+ def test_youtube_search_url(self):
+ dl = FakeYDL()
+ ie = YoutubeSearchURLIE(dl)
+ result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
+ entries = result['entries']
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['title'], 'youtube-dl test video')
+ self.assertTrue(len(entries) >= 5)
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 42cbcf699..62ccad20c 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -409,6 +409,13 @@ class YoutubeDL(object):
template_dict['autonumber'] = autonumber_templ % self._num_downloads
if template_dict.get('playlist_index') is not None:
template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
+ if template_dict.get('resolution') is None:
+ if template_dict.get('width') and template_dict.get('height'):
+ template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
+ elif template_dict.get('height'):
+ res = '%sp' % template_dict['height']
+ elif template_dict.get('width'):
+ res = '?x%d' % template_dict['width']
sanitize = lambda k, v: sanitize_filename(
compat_str(v),
@@ -675,6 +682,9 @@ class YoutubeDL(object):
info_dict['playlist'] = None
info_dict['playlist_index'] = None
+ if 'display_id' not in info_dict and 'id' in info_dict:
+ info_dict['display_id'] = info_dict['id']
+
# This extractors handle format selection themselves
if info_dict['extractor'] in ['Youku']:
if download:
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 70608066c..d39eb830f 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -46,12 +46,17 @@ __authors__ = (
'Andreas Schmitz',
'Michael Kaiser',
'Niklas Laxström',
+ 'David Triendl',
+ 'Anthony Weems',
+ 'David Wagner',
+ 'Juan C. Olivares',
)
__license__ = 'Public Domain'
import codecs
import getpass
+import io
import locale
import optparse
import os
@@ -70,6 +75,7 @@ from .utils import (
get_cachedir,
MaxDownloadsReached,
preferredencoding,
+ read_batch_urls,
SameFileError,
setproctitle,
std_headers,
@@ -424,6 +430,8 @@ def parseOpts(overrideArguments=None):
'%(extractor)s for the provider (youtube, metacafe, etc), '
'%(id)s for the video id, %(playlist)s for the playlist the video is in, '
'%(playlist_index)s for the position in the playlist and %% for a literal percent. '
+ '%(height)s and %(width)s for the width and height of the video format. '
+ '%(resolution)s for a textual description of the resolution of the video format. '
'Use - to output to stdout. Can also be used to download to a different directory, '
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
filesystem.add_option('--autonumber-size',
@@ -551,21 +559,19 @@ def _real_main(argv=None):
sys.exit(0)
# Batch file verification
- batchurls = []
+ batch_urls = []
if opts.batchfile is not None:
try:
if opts.batchfile == '-':
batchfd = sys.stdin
else:
- batchfd = open(opts.batchfile, 'r')
- batchurls = batchfd.readlines()
- batchurls = [x.strip() for x in batchurls]
- batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
+ batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
+ batch_urls = read_batch_urls(batchfd)
if opts.verbose:
- write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
+ write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')
except IOError:
sys.exit(u'ERROR: batch file could not be read')
- all_urls = batchurls + args
+ all_urls = batch_urls + args
all_urls = [url.strip() for url in all_urls]
_enc = preferredencoding()
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index 2a870a758..4e6abfe10 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -12,7 +12,6 @@ from .http import HttpFD
from ..utils import (
struct_pack,
struct_unpack,
- compat_urllib_request,
compat_urlparse,
format_bytes,
encodeFilename,
@@ -117,8 +116,8 @@ class FlvReader(io.BytesIO):
self.read_unsigned_char()
# flags
self.read(3)
- # BootstrapinfoVersion
- bootstrap_info_version = self.read_unsigned_int()
+
+ self.read_unsigned_int() # BootstrapinfoVersion
# Profile,Live,Update,Reserved
self.read(1)
# time scale
@@ -127,15 +126,15 @@ class FlvReader(io.BytesIO):
self.read_unsigned_long_long()
# SmpteTimeCodeOffset
self.read_unsigned_long_long()
- # MovieIdentifier
- movie_identifier = self.read_string()
+
+ self.read_string() # MovieIdentifier
server_count = self.read_unsigned_char()
# ServerEntryTable
for i in range(server_count):
self.read_string()
quality_count = self.read_unsigned_char()
# QualityEntryTable
- for i in range(server_count):
+ for i in range(quality_count):
self.read_string()
# DrmData
self.read_string()
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index 748f9f3ad..240ecb606 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -85,6 +85,7 @@ class HttpFD(FileDownloader):
else:
# The length does not match, we start the download over
self.report_unable_to_resume()
+ resume_len = 0
open_mode = 'wb'
break
# Retry
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 229bdc595..f35ee4941 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -19,12 +19,15 @@ from .bbccouk import BBCCoUkIE
from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
+from .br import BRIE
from .breakcom import BreakIE
from .brightcove import BrightcoveIE
from .c56 import C56IE
+from .canal13cl import Canal13clIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cbs import CBSIE
+from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE
from .cinemassacre import CinemassacreIE
@@ -88,6 +91,7 @@ from .funnyordie import FunnyOrDieIE
from .gamekings import GamekingsIE
from .gamespot import GameSpotIE
from .gametrailers import GametrailersIE
+from .gdcvault import GDCVaultIE
from .generic import GenericIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
@@ -132,11 +136,12 @@ from .lynda import (
)
from .m6 import M6IE
from .macgamestore import MacGameStoreIE
+from .mailru import MailRuIE
from .malemotion import MalemotionIE
from .mdr import MDRIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
-from .mit import TechTVMITIE, MITIE
+from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mixcloud import MixcloudIE
from .mpora import MporaIE
from .mofosex import MofosexIE
@@ -151,7 +156,10 @@ from .myspass import MySpassIE
from .myvideo import MyVideoIE
from .naver import NaverIE
from .nba import NBAIE
-from .nbc import NBCNewsIE
+from .nbc import (
+ NBCIE,
+ NBCNewsIE,
+)
from .ndr import NDRIE
from .ndtv import NDTVIE
from .newgrounds import NewgroundsIE
@@ -160,7 +168,7 @@ from .nhl import NHLIE, NHLVideocenterIE
from .niconico import NiconicoIE
from .ninegag import NineGagIE
from .normalboots import NormalbootsIE
-from .novamov import NovamovIE
+from .novamov import NovaMovIE
from .nowness import NownessIE
from .nowvideo import NowVideoIE
from .ooyala import OoyalaIE
@@ -171,6 +179,7 @@ from .podomatic import PodomaticIE
from .pornhd import PornHdIE
from .pornhub import PornHubIE
from .pornotube import PornotubeIE
+from .prosiebensat1 import ProSiebenSat1IE
from .pyvideo import PyvideoIE
from .radiofrance import RadioFranceIE
from .rbmaradio import RBMARadioIE
@@ -225,10 +234,12 @@ from .tinypic import TinyPicIE
from .toutv import TouTvIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
+from .trutube import TruTubeIE
from .tube8 import Tube8IE
from .tudou import TudouIE
from .tumblr import TumblrIE
from .tutv import TutvIE
+from .tvigle import TvigleIE
from .tvp import TvpIE
from .unistra import UnistraIE
from .ustream import UstreamIE, UstreamChannelIE
@@ -239,6 +250,7 @@ from .vesti import VestiIE
from .vevo import VevoIE
from .vice import ViceIE
from .viddler import ViddlerIE
+from .videobam import VideoBamIE
from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE
@@ -273,19 +285,20 @@ from .youku import YoukuIE
from .youporn import YouPornIE
from .youtube import (
YoutubeIE,
+ YoutubeChannelIE,
+ YoutubeFavouritesIE,
+ YoutubeHistoryIE,
YoutubePlaylistIE,
- YoutubeSearchIE,
+ YoutubeRecommendedIE,
YoutubeSearchDateIE,
- YoutubeUserIE,
- YoutubeChannelIE,
+ YoutubeSearchIE,
+ YoutubeSearchURLIE,
YoutubeShowIE,
YoutubeSubscriptionsIE,
- YoutubeRecommendedIE,
+ YoutubeTopListIE,
YoutubeTruncatedURLIE,
+ YoutubeUserIE,
YoutubeWatchLaterIE,
- YoutubeFavouritesIE,
- YoutubeHistoryIE,
- YoutubeTopListIE,
)
from .zdf import ZDFIE
diff --git a/youtube_dl/extractor/academicearth.py b/youtube_dl/extractor/academicearth.py
index 72f81d01a..59d3bbba4 100644
--- a/youtube_dl/extractor/academicearth.py
+++ b/youtube_dl/extractor/academicearth.py
@@ -5,7 +5,7 @@ from .common import InfoExtractor
class AcademicEarthCourseIE(InfoExtractor):
- _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P[^?#/]+)'
+ _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P[^?#/]+)'
IE_NAME = 'AcademicEarth:Course'
def _real_extract(self, url):
@@ -14,12 +14,12 @@ class AcademicEarthCourseIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id)
title = self._html_search_regex(
- r'
', webpage)
if m_msg is not None:
raise ExtractorError(
- u'The video is not available, Facebook said: "%s"' % m_msg.group(1),
+ 'The video is not available, Facebook said: "%s"' % m_msg.group(1),
expected=True)
else:
- raise ExtractorError(u'Cannot parse data')
+ raise ExtractorError('Cannot parse data')
data = dict(json.loads(m.group(1)))
params_raw = compat_urllib_parse.unquote(data['params'])
params = json.loads(params_raw)
@@ -119,12 +124,12 @@ class FacebookIE(InfoExtractor):
if not video_url:
video_url = video_data['sd_src']
if not video_url:
- raise ExtractorError(u'Cannot find video URL')
+ raise ExtractorError('Cannot find video URL')
video_duration = int(video_data['video_duration'])
thumbnail = video_data['thumbnail_src']
video_title = self._html_search_regex(
- r'