From: Philipp Hagemeister Date: Fri, 4 Apr 2014 20:44:49 +0000 (+0200) Subject: Merge remote-tracking branch 'AGSPhoenix/teamcoco-fix' X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=commitdiff_plain;h=b84d6e7fc42affddeb1baf989cf394fedc41a96d;hp=fa387d2d99b837d827a9a8b8996d245dd3d191c4 Merge remote-tracking branch 'AGSPhoenix/teamcoco-fix' --- diff --git a/MANIFEST.in b/MANIFEST.in index 8f8af7a7f..d43cc1f3b 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,3 +3,4 @@ include test/*.py include test/*.json include youtube-dl.bash-completion include youtube-dl.1 +recursive-include docs Makefile conf.py *.rst diff --git a/Makefile b/Makefile index c6d09932b..f7d917d09 100644 --- a/Makefile +++ b/Makefile @@ -72,8 +72,9 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- --exclude '__pycache' \ --exclude '.git' \ --exclude 'testdata' \ + --exclude 'docs/_build' \ -- \ - bin devscripts test youtube_dl \ + bin devscripts test youtube_dl docs \ CHANGELOG LICENSE README.md README.txt \ Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \ youtube-dl diff --git a/README.md b/README.md index a10b13055..1ba1486d2 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,9 @@ which means you can modify it, redistribute it or use it however you like. --user-agent UA specify a custom user agent --referer REF specify a custom referer, use if the video access is restricted to one domain + --add-header FIELD:VALUE specify a custom HTTP header and its value, + separated by a colon ':'. You can use this + option multiple times --list-extractors List all supported extractors and the URLs they would handle --extractor-descriptions Output descriptions of all supported @@ -62,6 +65,7 @@ which means you can modify it, redistribute it or use it however you like. configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows) + --encoding ENCODING Force the specified encoding (experimental) ## Video Selection: --playlist-start NUMBER playlist video to start at (default is 1) @@ -166,6 +170,7 @@ which means you can modify it, redistribute it or use it however you like. ## Verbosity / Simulation Options: -q, --quiet activates quiet mode + --no-warnings Ignore warnings -s, --simulate do not download the video and do not write anything to disk --skip-download do not download the video @@ -177,7 +182,9 @@ which means you can modify it, redistribute it or use it however you like. --get-duration simulate, quiet but print video length --get-filename simulate, quiet but print output filename --get-format simulate, quiet but print output format - -j, --dump-json simulate, quiet but print JSON information + -j, --dump-json simulate, quiet but print JSON information. + See --output for a description of available + keys. --newline output progress bar as new lines --no-progress do not print progress bar --console-title display progress in console titlebar @@ -364,7 +371,67 @@ If you want to create a build of youtube-dl yourself, you'll need ### Adding support for a new site -If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py TestDownload.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/). +If you want to add support for a new site, you can follow this quick list (assuming your service is called `yourextractor`): + +1. [Fork this repository](https://github.com/rg3/youtube-dl/fork) +2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git` +3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor` +4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`: + + # coding: utf-8 + from __future__ import unicode_literals + + import re + + from .common import InfoExtractor + + + class YourExtractorIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' + _TEST = { + 'url': 'http://yourextractor.com/watch/42', + 'md5': 'TODO: md5 sum of the first 10KiB of the video file', + 'info_dict': { + 'id': '42', + 'ext': 'mp4', + 'title': 'Video title goes here', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + # TODO more code goes here, for example ... + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'

(.*?)

', webpage, 'title') + + return { + 'id': video_id, + 'title': title, + # TODO more properties (see youtube_dl/extractor/common.py) + } + + +5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). +6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. +7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want. +8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501). +9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this: + + $ git add youtube_dl/extractor/__init__.py + $ git add youtube_dl/extractor/yourextractor.py + $ git commit -m '[yourextractor] Add new extractor' + $ git push origin yourextractor + +10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. + +In any case, thank you very much for your contributions! # BUGS diff --git a/devscripts/release.sh b/devscripts/release.sh index aa3119c42..2974a7c3e 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -22,6 +22,12 @@ fi if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi version="$1" +major_version=$(echo "$version" | sed -n 's#^\([0-9]*\.[0-9]*\.[0-9]*\).*#\1#p') +if test "$major_version" '!=' "$(date '+%Y.%m.%d')"; then + echo "$version does not start with today's date!" + exit 1 +fi + if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi useless_files=$(find youtube_dl -type f -not -name '*.py') diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 000000000..69fa449dd --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1 @@ +_build/ diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 000000000..712218045 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,177 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/youtube-dl.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/youtube-dl.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/youtube-dl" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/youtube-dl" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 000000000..4a04ad779 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +# +# youtube-dl documentation build configuration file, created by +# sphinx-quickstart on Fri Mar 14 21:05:43 2014. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os +# Allows to import youtube_dl +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +# -- General configuration ------------------------------------------------ + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'youtube-dl' +copyright = u'2014, Ricardo Garcia Gonzalez' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +import youtube_dl +version = youtube_dl.__version__ +# The full version, including alpha/beta/rc tags. +release = version + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'default' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Output file base name for HTML help builder. +htmlhelp_basename = 'youtube-dldoc' diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 000000000..b746ff95b --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,23 @@ +Welcome to youtube-dl's documentation! +====================================== + +*youtube-dl* is a command-line program to download videos from YouTube.com and more sites. +It can also be used in Python code. + +Developer guide +--------------- + +This section contains information for using *youtube-dl* from Python programs. + +.. toctree:: + :maxdepth: 2 + + module_guide + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/module_guide.rst b/docs/module_guide.rst new file mode 100644 index 000000000..03d72882e --- /dev/null +++ b/docs/module_guide.rst @@ -0,0 +1,67 @@ +Using the ``youtube_dl`` module +=============================== + +When using the ``youtube_dl`` module, you start by creating an instance of :class:`YoutubeDL` and adding all the available extractors: + +.. code-block:: python + + >>> from youtube_dl import YoutubeDL + >>> ydl = YoutubeDL() + >>> ydl.add_default_info_extractors() + +Extracting video information +---------------------------- + +You use the :meth:`YoutubeDL.extract_info` method for getting the video information, which returns a dictionary: + +.. code-block:: python + + >>> info = ydl.extract_info('http://www.youtube.com/watch?v=BaW_jenozKc', download=False) + [youtube] Setting language + [youtube] BaW_jenozKc: Downloading webpage + [youtube] BaW_jenozKc: Downloading video info webpage + [youtube] BaW_jenozKc: Extracting video information + >>> info['title'] + 'youtube-dl test video "\'/\\ä↭𝕐' + >>> info['height'], info['width'] + (720, 1280) + +If you want to download or play the video you can get its url: + +.. code-block:: python + + >>> info['url'] + 'https://...' + +Extracting playlist information +------------------------------- + +The playlist information is extracted in a similar way, but the dictionary is a bit different: + +.. code-block:: python + + >>> playlist = ydl.extract_info('http://www.ted.com/playlists/13/open_source_open_world', download=False) + [TED] open_source_open_world: Downloading playlist webpage + ... + >>> playlist['title'] + 'Open-source, open world' + + + +You can access the videos in the playlist with the ``entries`` field: + +.. code-block:: python + + >>> for video in playlist['entries']: + ... print('Video #%d: %s' % (video['playlist_index'], video['title'])) + + Video #1: How Arduino is open-sourcing imagination + Video #2: The year open data went worldwide + Video #3: Massive-scale online collaboration + Video #4: The art of asking + Video #5: How cognitive surplus will change the world + Video #6: The birth of Wikipedia + Video #7: Coding a better government + Video #8: The era of open innovation + Video #9: The currency of the new economy is trust + diff --git a/test/helper.py b/test/helper.py index 17de951c5..8739f816c 100644 --- a/test/helper.py +++ b/test/helper.py @@ -9,7 +9,10 @@ import sys import youtube_dl.extractor from youtube_dl import YoutubeDL -from youtube_dl.utils import preferredencoding +from youtube_dl.utils import ( + compat_str, + preferredencoding, +) def get_params(override=None): @@ -83,3 +86,45 @@ def gettestcases(): md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() + + +def expect_info_dict(self, expected_dict, got_dict): + for info_field, expected in expected_dict.items(): + if isinstance(expected, compat_str) and expected.startswith('re:'): + got = got_dict.get(info_field) + match_str = expected[len('re:'):] + match_rex = re.compile(match_str) + + self.assertTrue( + isinstance(got, compat_str) and match_rex.match(got), + u'field %s (value: %r) should match %r' % (info_field, got, match_str)) + elif isinstance(expected, type): + got = got_dict.get(info_field) + self.assertTrue(isinstance(got, expected), + u'Expected type %r, but got value %r of type %r' % (expected, got, type(got))) + else: + if isinstance(expected, compat_str) and expected.startswith('md5:'): + got = 'md5:' + md5(got_dict.get(info_field)) + else: + got = got_dict.get(info_field) + self.assertEqual(expected, got, + u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) + + # Check for the presence of mandatory fields + for key in ('id', 'url', 'title', 'ext'): + self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) + # Check for mandatory fields that are automatically set by YoutubeDL + for key in ['webpage_url', 'extractor', 'extractor_key']: + self.assertTrue(got_dict.get(key), u'Missing field: %s' % key) + + # Are checkable fields missing from the test case definition? + test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) + for key, value in got_dict.items() + if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) + missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) + if missing_keys: + sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n') + self.assertFalse( + missing_keys, + 'Missing keys in test definition: %s' % ( + ', '.join(sorted(missing_keys)))) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 1f3ccaea0..2902dbec7 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -26,16 +26,27 @@ class YDL(FakeYDL): self.msgs.append(msg) +def _make_result(formats, **kwargs): + res = { + 'formats': formats, + 'id': 'testid', + 'title': 'testttitle', + 'extractor': 'testex', + } + res.update(**kwargs) + return res + + class TestFormatSelection(unittest.TestCase): def test_prefer_free_formats(self): # Same resolution => download webm ydl = YDL() ydl.params['prefer_free_formats'] = True formats = [ - {'ext': 'webm', 'height': 460}, - {'ext': 'mp4', 'height': 460}, + {'ext': 'webm', 'height': 460, 'url': 'x'}, + {'ext': 'mp4', 'height': 460, 'url': 'y'}, ] - info_dict = {'formats': formats, 'extractor': 'test'} + info_dict = _make_result(formats) yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) @@ -46,8 +57,8 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL() ydl.params['prefer_free_formats'] = True formats = [ - {'ext': 'webm', 'height': 720}, - {'ext': 'mp4', 'height': 1080}, + {'ext': 'webm', 'height': 720, 'url': 'a'}, + {'ext': 'mp4', 'height': 1080, 'url': 'b'}, ] info_dict['formats'] = formats yie = YoutubeIE(ydl) @@ -60,9 +71,9 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL() ydl.params['prefer_free_formats'] = False formats = [ - {'ext': 'webm', 'height': 720}, - {'ext': 'mp4', 'height': 720}, - {'ext': 'flv', 'height': 720}, + {'ext': 'webm', 'height': 720, 'url': '_'}, + {'ext': 'mp4', 'height': 720, 'url': '_'}, + {'ext': 'flv', 'height': 720, 'url': '_'}, ] info_dict['formats'] = formats yie = YoutubeIE(ydl) @@ -74,8 +85,8 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL() ydl.params['prefer_free_formats'] = False formats = [ - {'ext': 'flv', 'height': 720}, - {'ext': 'webm', 'height': 720}, + {'ext': 'flv', 'height': 720, 'url': '_'}, + {'ext': 'webm', 'height': 720, 'url': '_'}, ] info_dict['formats'] = formats yie = YoutubeIE(ydl) @@ -91,8 +102,7 @@ class TestFormatSelection(unittest.TestCase): {'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3}, {'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4}, ] - info_dict = { - 'formats': formats, 'extractor': 'test', 'id': 'testvid'} + info_dict = _make_result(formats) ydl = YDL() ydl.process_ie_result(info_dict) @@ -120,12 +130,12 @@ class TestFormatSelection(unittest.TestCase): def test_format_selection(self): formats = [ - {'format_id': '35', 'ext': 'mp4', 'preference': 1}, - {'format_id': '45', 'ext': 'webm', 'preference': 2}, - {'format_id': '47', 'ext': 'webm', 'preference': 3}, - {'format_id': '2', 'ext': 'flv', 'preference': 4}, + {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'}, + {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'}, + {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'}, + {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'}, ] - info_dict = {'formats': formats, 'extractor': 'test'} + info_dict = _make_result(formats) ydl = YDL({'format': '20/47'}) ydl.process_ie_result(info_dict.copy()) @@ -154,12 +164,12 @@ class TestFormatSelection(unittest.TestCase): def test_format_selection_audio(self): formats = [ - {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none'}, - {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none'}, - {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none'}, - {'format_id': 'vid', 'ext': 'mp4', 'preference': 4}, + {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'}, + {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'}, + {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'}, + {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'}, ] - info_dict = {'formats': formats, 'extractor': 'test'} + info_dict = _make_result(formats) ydl = YDL({'format': 'bestaudio'}) ydl.process_ie_result(info_dict.copy()) @@ -172,10 +182,10 @@ class TestFormatSelection(unittest.TestCase): self.assertEqual(downloaded['format_id'], 'audio-low') formats = [ - {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1}, - {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2}, + {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'}, + {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'}, ] - info_dict = {'formats': formats, 'extractor': 'test'} + info_dict = _make_result(formats) ydl = YDL({'format': 'bestaudio/worstaudio/best'}) ydl.process_ie_result(info_dict.copy()) @@ -184,11 +194,11 @@ class TestFormatSelection(unittest.TestCase): def test_format_selection_video(self): formats = [ - {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none'}, - {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none'}, - {'format_id': 'vid', 'ext': 'mp4', 'preference': 3}, + {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'}, + {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'}, + {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'}, ] - info_dict = {'formats': formats, 'extractor': 'test'} + info_dict = _make_result(formats) ydl = YDL({'format': 'bestvideo'}) ydl.process_ie_result(info_dict.copy()) @@ -217,10 +227,12 @@ class TestFormatSelection(unittest.TestCase): for f1id, f2id in zip(order, order[1:]): f1 = YoutubeIE._formats[f1id].copy() f1['format_id'] = f1id + f1['url'] = 'url:' + f1id f2 = YoutubeIE._formats[f2id].copy() f2['format_id'] = f2id + f2['url'] = 'url:' + f2id - info_dict = {'formats': [f1, f2], 'extractor': 'youtube'} + info_dict = _make_result([f1, f2], extractor='youtube') ydl = YDL() yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) @@ -228,7 +240,7 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], f1id) - info_dict = {'formats': [f2, f1], 'extractor': 'youtube'} + info_dict = _make_result([f2, f1], extractor='youtube') ydl = YDL() yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 39ac8b8a1..577f6ac32 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -143,5 +143,25 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS']) self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS']) + def test_ComedyCentralShows(self): + self.assertMatch( + 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', + ['ComedyCentralShows']) + self.assertMatch( + 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news', + ['ComedyCentralShows']) + self.assertMatch( + 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114', + ['ComedyCentralShows']) + self.assertMatch( + 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3', + ['ComedyCentralShows']) + + def test_yahoo_https(self): + # https://github.com/rg3/youtube-dl/issues/2701 + self.assertMatch( + 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', + ['Yahoo']) + if __name__ == '__main__': unittest.main() diff --git a/test/test_download.py b/test/test_download.py index 815f5bb09..f171c10ba 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -9,16 +9,16 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import ( get_params, gettestcases, - try_rm, + expect_info_dict, md5, - report_warning + try_rm, + report_warning, ) import hashlib import io import json -import re import socket import youtube_dl.YoutubeDL @@ -135,45 +135,8 @@ def generator(test_case): self.assertEqual(md5_for_file, tc['md5']) with io.open(info_json_fn, encoding='utf-8') as infof: info_dict = json.load(infof) - for (info_field, expected) in tc.get('info_dict', {}).items(): - if isinstance(expected, compat_str) and expected.startswith('re:'): - got = info_dict.get(info_field) - match_str = expected[len('re:'):] - match_rex = re.compile(match_str) - - self.assertTrue( - isinstance(got, compat_str) and match_rex.match(got), - u'field %s (value: %r) should match %r' % (info_field, got, match_str)) - elif isinstance(expected, type): - got = info_dict.get(info_field) - self.assertTrue(isinstance(got, expected), - u'Expected type %r, but got value %r of type %r' % (expected, got, type(got))) - else: - if isinstance(expected, compat_str) and expected.startswith('md5:'): - got = 'md5:' + md5(info_dict.get(info_field)) - else: - got = info_dict.get(info_field) - self.assertEqual(expected, got, - u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) - - # Check for the presence of mandatory fields - for key in ('id', 'url', 'title', 'ext'): - self.assertTrue(key in info_dict.keys() and info_dict[key]) - # Check for mandatory fields that are automatically set by YoutubeDL - for key in ['webpage_url', 'extractor', 'extractor_key']: - self.assertTrue(info_dict.get(key), u'Missing field: %s' % key) - - # Are checkable fields missing from the test case definition? - test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) - for key, value in info_dict.items() - if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) - missing_keys = set(test_info_dict.keys()) - set(tc.get('info_dict', {}).keys()) - if missing_keys: - sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n') - self.assertFalse( - missing_keys, - 'Missing keys in test definition: %s' % ( - ','.join(sorted(missing_keys)))) + + expect_info_dict(self, tc.get('info_dict', {}), info_dict) finally: try_rm_tcs_files() diff --git a/test/test_playlists.py b/test/test_playlists.py index 4c9c34057..5fb679aa1 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -9,8 +9,10 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL - +from test.helper import ( + expect_info_dict, + FakeYDL, +) from youtube_dl.extractor import ( AcademicEarthCourseIE, @@ -38,6 +40,9 @@ from youtube_dl.extractor import ( GenericIE, TEDIE, ToypicsUserIE, + XTubeUserIE, + InstagramUserIE, + CSpanIE, ) @@ -278,5 +283,51 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], 'Mikey') self.assertTrue(len(result['entries']) >= 17) + def test_xtube_user(self): + dl = FakeYDL() + ie = XTubeUserIE(dl) + result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'greenshowers') + self.assertTrue(len(result['entries']) >= 155) + + def test_InstagramUser(self): + dl = FakeYDL() + ie = InstagramUserIE(dl) + result = ie.extract('http://instagram.com/porsche') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'porsche') + self.assertTrue(len(result['entries']) >= 2) + test_video = next( + e for e in result['entries'] + if e['id'] == '614605558512799803_462752227') + dl.add_default_extra_info(test_video, ie, '(irrelevant URL)') + dl.process_video_result(test_video, download=False) + EXPECTED = { + 'id': '614605558512799803_462752227', + 'ext': 'mp4', + 'title': '#Porsche Intelligent Performance.', + 'thumbnail': 're:^https?://.*\.jpg', + 'uploader': 'Porsche', + 'uploader_id': 'porsche', + 'timestamp': 1387486713, + 'upload_date': '20131219', + } + expect_info_dict(self, EXPECTED, test_video) + + def test_CSpan_playlist(self): + dl = FakeYDL() + ie = CSpanIE(dl) + result = ie.extract( + 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], '342759') + self.assertEqual( + result['title'], 'General Motors Ignition Switch Recall') + self.assertEqual(len(result['entries']), 9) + whole_duration = sum(e['duration'] for e in result['entries']) + self.assertEqual(whole_duration, 14855) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index 7ee74e36c..2348c0415 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # Various small unit tests import io +import json import xml.etree.ElementTree #from youtube_dl.utils import htmlentity_transform @@ -35,6 +36,8 @@ from youtube_dl.utils import ( url_basename, urlencode_postdata, xpath_with_ns, + parse_iso8601, + strip_jsonp, ) if sys.version_info < (3, 0): @@ -266,5 +269,16 @@ class TestUtil(unittest.TestCase): data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) self.assertTrue(isinstance(data, bytes)) + def test_parse_iso8601(self): + self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266) + self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266) + self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266) + + def test_strip_jsonp(self): + stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);') + d = json.loads(stripped) + self.assertEqual(d, [{"id": "532cb", "x": 3}]) + + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c5d08b0bb..5794fdbe9 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -8,6 +8,7 @@ import datetime import errno import io import json +import locale import os import platform import re @@ -94,6 +95,7 @@ class YoutubeDL(object): usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. + no_warnings: Do not print out anything for warnings. forceurl: Force printing final URL. forcetitle: Force printing title. forceid: Force printing ID. @@ -158,6 +160,7 @@ class YoutubeDL(object): include_ads: Download ads as well default_search: Prepend this string if an input url is not valid. 'auto' for elaborate guessing + encoding: Use this encoding instead of the system-specified. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -376,6 +379,8 @@ class YoutubeDL(object): if self.params.get('logger') is not None: self.params['logger'].warning(message) else: + if self.params.get('no_warnings'): + return if self._err_file.isatty() and os.name != 'nt': _msg_header = '\033[0;33mWARNING:\033[0m' else: @@ -512,13 +517,7 @@ class YoutubeDL(object): '_type': 'compat_list', 'entries': ie_result, } - self.add_extra_info(ie_result, - { - 'extractor': ie.IE_NAME, - 'webpage_url': url, - 'webpage_url_basename': url_basename(url), - 'extractor_key': ie.ie_key(), - }) + self.add_default_extra_info(ie_result, ie, url) if process: return self.process_ie_result(ie_result, download, extra_info) else: @@ -537,6 +536,14 @@ class YoutubeDL(object): else: self.report_error('no suitable InfoExtractor for URL %s' % url) + def add_default_extra_info(self, ie_result, ie, url): + self.add_extra_info(ie_result, { + 'extractor': ie.IE_NAME, + 'webpage_url': url, + 'webpage_url_basename': url_basename(url), + 'extractor_key': ie.ie_key(), + }) + def process_ie_result(self, ie_result, download=True, extra_info={}): """ Take the result of the ie(may be modified) and resolve all unresolved @@ -695,6 +702,11 @@ class YoutubeDL(object): def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' + if 'id' not in info_dict: + raise ExtractorError('Missing "id" field in extractor result') + if 'title' not in info_dict: + raise ExtractorError('Missing "title" field in extractor result') + if 'playlist' not in info_dict: # It isn't part of a playlist info_dict['playlist'] = None @@ -726,6 +738,9 @@ class YoutubeDL(object): # We check that all the formats have the format and format_id fields for i, format in enumerate(formats): + if 'url' not in format: + raise ExtractorError('Missing "url" key in result (index %d)' % i) + if format.get('format_id') is None: format['format_id'] = compat_str(i) if format.get('format') is None: @@ -736,7 +751,7 @@ class YoutubeDL(object): ) # Automatically determine file extension if missing if 'ext' not in format: - format['ext'] = determine_ext(format['url']) + format['ext'] = determine_ext(format['url']).lower() format_limit = self.params.get('format_limit', None) if format_limit: @@ -861,7 +876,7 @@ class YoutubeDL(object): try: dn = os.path.dirname(encodeFilename(filename)) - if dn != '' and not os.path.exists(dn): + if dn and not os.path.exists(dn): os.makedirs(dn) except (OSError, IOError) as err: self.report_error('unable to create directory ' + compat_str(err)) @@ -1195,6 +1210,9 @@ class YoutubeDL(object): def print_debug_header(self): if not self.params.get('verbose'): return + + write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % + (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding())) write_string('[debug] youtube-dl version ' + __version__ + '\n') try: sp = subprocess.Popen( @@ -1259,3 +1277,19 @@ class YoutubeDL(object): # (See https://github.com/rg3/youtube-dl/issues/1309 for details) opener.addheaders = [] self._opener = opener + + def encode(self, s): + if isinstance(s, bytes): + return s # Already encoded + + try: + return s.encode(self.get_encoding()) + except UnicodeEncodeError as err: + err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.' + raise + + def get_encoding(self): + encoding = self.params.get('encoding') + if encoding is None: + encoding = preferredencoding() + return encoding diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index a4cbdb0bd..aba8b4537 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -51,6 +51,7 @@ __authors__ = ( 'David Wagner', 'Juan C. Olivares', 'Mattias Harrysson', + 'phaer', ) __license__ = 'Public Domain' @@ -227,6 +228,9 @@ def parseOpts(overrideArguments=None): general.add_option('--referer', dest='referer', help='specify a custom referer, use if the video access is restricted to one domain', metavar='REF', default=None) + general.add_option('--add-header', + dest='headers', help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', action="append", + metavar='FIELD:VALUE') general.add_option('--list-extractors', action='store_true', dest='list_extractors', help='List all supported extractors and the URLs they would handle', default=False) @@ -238,7 +242,7 @@ def parseOpts(overrideArguments=None): help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option( - '--prefer-insecure', action='store_true', dest='prefer_insecure', + '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure', help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)') general.add_option( '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', @@ -252,13 +256,17 @@ def parseOpts(overrideArguments=None): general.add_option( '--bidi-workaround', dest='bidi_workaround', action='store_true', help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH') - general.add_option('--default-search', - dest='default_search', metavar='PREFIX', - help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.') + general.add_option( + '--default-search', + dest='default_search', metavar='PREFIX', + help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.') general.add_option( '--ignore-config', action='store_true', help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') + general.add_option( + '--encoding', dest='encoding', metavar='ENCODING', + help='Force the specified encoding (experimental)') selection.add_option( '--playlist-start', @@ -361,6 +369,10 @@ def parseOpts(overrideArguments=None): verbosity.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode', default=False) + verbosity.add_option( + '--no-warnings', + dest='no_warnings', action='store_true', default=False, + help='Ignore warnings') verbosity.add_option('-s', '--simulate', action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False) verbosity.add_option('--skip-download', @@ -388,7 +400,7 @@ def parseOpts(overrideArguments=None): help='simulate, quiet but print output format', default=False) verbosity.add_option('-j', '--dump-json', action='store_true', dest='dumpjson', - help='simulate, quiet but print JSON information', default=False) + help='simulate, quiet but print JSON information. See --output for a description of available keys.', default=False) verbosity.add_option('--newline', action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False) verbosity.add_option('--no-progress', @@ -532,8 +544,6 @@ def parseOpts(overrideArguments=None): write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') - write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' % - (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding())) return parser, opts, args @@ -556,6 +566,16 @@ def _real_main(argv=None): if opts.referer is not None: std_headers['Referer'] = opts.referer + # Custom HTTP headers + if opts.headers is not None: + for h in opts.headers: + if h.find(':', 1) < 0: + parser.error(u'wrong header formatting, it should be key:value, not "%s"'%h) + key, value = h.split(':', 2) + if opts.verbose: + write_string(u'[debug] Adding header from command line option %s:%s\n'%(key, value)) + std_headers[key] = value + # Dump user agent if opts.dump_user_agent: compat_print(std_headers['User-Agent']) @@ -657,7 +677,7 @@ def _real_main(argv=None): date = DateRange.day(opts.date) else: date = DateRange(opts.dateafter, opts.datebefore) - if opts.default_search not in ('auto', None) and ':' not in opts.default_search: + if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search: parser.error(u'--default-search invalid; did you forget a colon (:) at the end?') # Do not download videos when there are audio-only formats @@ -695,6 +715,7 @@ def _real_main(argv=None): 'password': opts.password, 'videopassword': opts.videopassword, 'quiet': (opts.quiet or any_printing), + 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, 'forceid': opts.getid, @@ -767,6 +788,7 @@ def _real_main(argv=None): 'include_ads': opts.include_ads, 'default_search': opts.default_search, 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest, + 'encoding': opts.encoding, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 5a068aa8b..917f3450e 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -4,9 +4,10 @@ import sys import time from ..utils import ( + compat_str, encodeFilename, - timeconvert, format_bytes, + timeconvert, ) @@ -173,7 +174,7 @@ class FileDownloader(object): return os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) except (IOError, OSError) as err: - self.report_error(u'unable to rename file: %s' % str(err)) + self.report_error(u'unable to rename file: %s' % compat_str(err)) def try_utime(self, filename, last_modified_hdr): """Try to set the last-modified time of the given file.""" diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 4e6abfe10..e6be6ae6c 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -297,6 +297,7 @@ class F4mFD(FileDownloader): break frags_filenames.append(frag_filename) + dest_stream.close() self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start) self.try_rename(tmpfilename, filename) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index fa983462b..9d407fe6e 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -13,8 +13,10 @@ class HlsFD(FileDownloader): self.report_destination(filename) tmpfilename = self.temp_name(filename) - args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy', - '-bsf:a', 'aac_adtstoasc', tmpfilename] + args = [ + '-y', '-i', url, '-f', 'mp4', '-c', 'copy', + '-bsf:a', 'aac_adtstoasc', + encodeFilename(tmpfilename, for_subprocess=True)] for program in ['avconv', 'ffmpeg']: try: diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 348097dab..cc8b9c9a7 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -23,6 +23,8 @@ class HttpFD(FileDownloader): headers = {'Youtubedl-no-compression': 'True'} if 'user_agent' in info_dict: headers['Youtubedl-user-agent'] = info_dict['user_agent'] + if 'http_referer' in info_dict: + headers['Referer'] = info_dict['http_referer'] basic_request = compat_urllib_request.Request(url, None, headers) request = compat_urllib_request.Request(url, None, headers) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b8c843515..66f71edf6 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -14,6 +14,7 @@ from .arte import ( ArteTVConcertIE, ArteTVFutureIE, ArteTVDDCIE, + ArteTVEmbedIE, ) from .auengine import AUEngineIE from .bambuser import BambuserIE, BambuserChannelIE @@ -25,6 +26,7 @@ from .bloomberg import BloombergIE from .br import BRIE from .breakcom import BreakIE from .brightcove import BrightcoveIE +from .byutv import BYUtvIE from .c56 import C56IE from .canal13cl import Canal13clIE from .canalplus import CanalplusIE @@ -38,6 +40,7 @@ from .clipfish import ClipfishIE from .cliphunter import CliphunterIE from .clipsyndicate import ClipsyndicateIE from .cmt import CMTIE +from .cnet import CNETIE from .cnn import ( CNNIE, CNNBlogsIE, @@ -81,6 +84,7 @@ from .fktv import ( ) from .flickr import FlickrIE from .fourtube import FourTubeIE +from .franceculture import FranceCultureIE from .franceinter import FranceInterIE from .francetv import ( PluzzIE, @@ -112,7 +116,7 @@ from .imdb import ( ) from .ina import InaIE from .infoq import InfoQIE -from .instagram import InstagramIE +from .instagram import InstagramIE, InstagramUserIE from .internetvideoarchive import InternetVideoArchiveIE from .iprima import IPrimaIE from .ivi import ( @@ -150,10 +154,13 @@ from .mixcloud import MixcloudIE from .mpora import MporaIE from .mofosex import MofosexIE from .mooshare import MooshareIE +from .morningstar import MorningstarIE +from .motorsport import MotorsportIE from .mtv import ( MTVIE, MTVIggyIE, ) +from .musicplayon import MusicPlayOnIE from .muzu import MuzuTVIE from .myspace import MySpaceIE from .myspass import MySpassIE @@ -175,6 +182,8 @@ from .normalboots import NormalbootsIE from .novamov import NovaMovIE from .nowness import NownessIE from .nowvideo import NowVideoIE +from .ntv import NTVIE +from .oe1 import OE1IE from .ooyala import OoyalaIE from .orf import ORFIE from .parliamentliveuk import ParliamentLiveUKIE @@ -195,6 +204,7 @@ from .ro220 import Ro220IE from .rottentomatoes import RottenTomatoesIE from .roxwel import RoxwelIE from .rtlnow import RTLnowIE +from .rts import RTSIE from .rutube import ( RutubeIE, RutubeChannelIE, @@ -205,7 +215,6 @@ from .rutv import RUTVIE from .savefrom import SaveFromIE from .servingsys import ServingSysIE from .sina import SinaIE -from .slashdot import SlashdotIE from .slideshare import SlideshareIE from .smotri import ( SmotriIE, @@ -254,13 +263,13 @@ from .udemy import ( UdemyCourseIE ) from .unistra import UnistraIE +from .urort import UrortIE from .ustream import UstreamIE, UstreamChannelIE from .vbox7 import Vbox7IE from .veehd import VeeHDIE from .veoh import VeohIE from .vesti import VestiIE from .vevo import VevoIE -from .vice import ViceIE from .viddler import ViddlerIE from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE @@ -279,16 +288,21 @@ from .vine import VineIE from .viki import VikiIE from .vk import VKIE from .vube import VubeIE +from .washingtonpost import WashingtonPostIE from .wat import WatIE -from .wdr import WDRIE +from .wdr import ( + WDRIE, + WDRMausIE, +) from .weibo import WeiboIE from .wimp import WimpIE from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE +from .xbef import XBefIE from .xhamster import XHamsterIE from .xnxx import XNXXIE from .xvideos import XVideosIE -from .xtube import XTubeIE +from .xtube import XTubeUserIE, XTubeIE from .yahoo import ( YahooIE, YahooNewsIE, diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index a3a1b999d..fcf296057 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -14,14 +16,14 @@ from ..utils import ( class AddAnimeIE(InfoExtractor): _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P[\w_]+)(?:.*)' - IE_NAME = u'AddAnime' _TEST = { - u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', - u'file': u'24MR3YO5SAS9.mp4', - u'md5': u'72954ea10bc979ab5e2eb288b21425a0', - u'info_dict': { - u"description": u"One Piece 606", - u"title": u"One Piece 606" + 'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', + 'md5': '72954ea10bc979ab5e2eb288b21425a0', + 'info_dict': { + 'id': '24MR3YO5SAS9', + 'ext': 'mp4', + 'description': 'One Piece 606', + 'title': 'One Piece 606', } } @@ -38,10 +40,10 @@ class AddAnimeIE(InfoExtractor): redir_webpage = ee.cause.read().decode('utf-8') action = self._search_regex( r'
', - redir_webpage, u'redirect vc value') + redir_webpage, 'redirect vc value') av = re.search( r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);', redir_webpage) @@ -52,19 +54,19 @@ class AddAnimeIE(InfoExtractor): parsed_url = compat_urllib_parse_urlparse(url) av_val = av_res + len(parsed_url.netloc) confirm_url = ( - parsed_url.scheme + u'://' + parsed_url.netloc + + parsed_url.scheme + '://' + parsed_url.netloc + action + '?' + compat_urllib_parse.urlencode({ 'jschl_vc': vc, 'jschl_answer': compat_str(av_val)})) self._download_webpage( confirm_url, video_id, - note=u'Confirming after redirect') + note='Confirming after redirect') webpage = self._download_webpage(url, video_id) formats = [] for format_id in ('normal', 'hq'): rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id) - video_url = self._search_regex(rex, webpage, u'video file URLx', + video_url = self._search_regex(rex, webpage, 'video file URLx', fatal=False) if not video_url: continue @@ -72,14 +74,13 @@ class AddAnimeIE(InfoExtractor): 'format_id': format_id, 'url': video_url, }) - if not formats: - raise ExtractorError(u'Cannot find any video format!') + self._sort_formats(formats) video_title = self._og_search_title(webpage) video_description = self._og_search_description(webpage) return { '_type': 'video', - 'id': video_id, + 'id': video_id, 'formats': formats, 'title': video_title, 'description': video_description diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 922cede05..dc8657b67 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -6,7 +6,6 @@ import json from .common import InfoExtractor from ..utils import ( compat_urlparse, - determine_ext, ) @@ -16,9 +15,10 @@ class AppleTrailersIE(InfoExtractor): "url": "http://trailers.apple.com/trailers/wb/manofsteel/", "playlist": [ { - "file": "manofsteel-trailer4.mov", "md5": "d97a8e575432dbcb81b7c3acb741f8a8", "info_dict": { + "id": "manofsteel-trailer4", + "ext": "mov", "duration": 111, "title": "Trailer 4", "upload_date": "20130523", @@ -26,9 +26,10 @@ class AppleTrailersIE(InfoExtractor): }, }, { - "file": "manofsteel-trailer3.mov", "md5": "b8017b7131b721fb4e8d6f49e1df908c", "info_dict": { + "id": "manofsteel-trailer3", + "ext": "mov", "duration": 182, "title": "Trailer 3", "upload_date": "20130417", @@ -36,9 +37,10 @@ class AppleTrailersIE(InfoExtractor): }, }, { - "file": "manofsteel-trailer.mov", "md5": "d0f1e1150989b9924679b441f3404d48", "info_dict": { + "id": "manofsteel-trailer", + "ext": "mov", "duration": 148, "title": "Trailer", "upload_date": "20121212", @@ -46,15 +48,16 @@ class AppleTrailersIE(InfoExtractor): }, }, { - "file": "manofsteel-teaser.mov", "md5": "5fe08795b943eb2e757fa95cb6def1cb", "info_dict": { + "id": "manofsteel-teaser", + "ext": "mov", "duration": 93, "title": "Teaser", "upload_date": "20120721", "uploader_id": "wb", }, - } + }, ] } @@ -65,16 +68,16 @@ class AppleTrailersIE(InfoExtractor): movie = mobj.group('movie') uploader_id = mobj.group('company') - playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc') + playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') def fix_html(s): - s = re.sub(r'(?s).*?', u'', s) + s = re.sub(r'(?s).*?', '', s) s = re.sub(r'', r'', s) # The ' in the onClick attributes are not escaped, it couldn't be parsed # like: http://trailers.apple.com/trailers/wb/gravity/ def _clean_json(m): - return u'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') + return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') s = re.sub(self._JSON_RE, _clean_json, s) - s = u'' + s + u'' + s = '' + s + u'' return s doc = self._download_xml(playlist_url, movie, transform_source=fix_html) @@ -82,7 +85,7 @@ class AppleTrailersIE(InfoExtractor): for li in doc.findall('./div/ul/li'): on_click = li.find('.//a').attrib['onClick'] trailer_info_json = self._search_regex(self._JSON_RE, - on_click, u'trailer info') + on_click, 'trailer info') trailer_info = json.loads(trailer_info_json) title = trailer_info['title'] video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() @@ -98,8 +101,7 @@ class AppleTrailersIE(InfoExtractor): first_url = trailer_info['url'] trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) - settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json') - settings = json.loads(settings_json) + settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') formats = [] for format in settings['metadata']['sizes']: @@ -107,7 +109,6 @@ class AppleTrailersIE(InfoExtractor): format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src']) formats.append({ 'url': format_url, - 'ext': determine_ext(format_url), 'format': format['type'], 'width': format['width'], 'height': int(format['height']), diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 979481b21..646377e4b 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..utils import ( @@ -19,114 +18,41 @@ from ..utils import ( # is different for each one. The videos usually expire in 7 days, so we can't # add tests. -class ArteTvIE(InfoExtractor): - _VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?Pfr|de)/.*-(?P.*?)\.html' - _LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?Pfr|de)/(?P.+?)/(?P.+)' - _LIVE_URL = r'index-[0-9]+\.html$' +class ArteTvIE(InfoExtractor): + _VALID_URL = r'http://videos\.arte\.tv/(?Pfr|de)/.*-(?P.*?)\.html' IE_NAME = 'arte.tv' - @classmethod - def suitable(cls, url): - return any(re.match(regex, url) for regex in (cls._VIDEOS_URL, cls._LIVEWEB_URL)) - - # TODO implement Live Stream - # from ..utils import compat_urllib_parse - # def extractLiveStream(self, url): - # video_lang = url.split('/')[-4] - # info = self.grep_webpage( - # url, - # r'src="(.*?/videothek_js.*?\.js)', - # 0, - # [ - # (1, 'url', 'Invalid URL: %s' % url) - # ] - # ) - # http_host = url.split('/')[2] - # next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url'))) - # info = self.grep_webpage( - # next_url, - # r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' + - # '(http://.*?\.swf).*?' + - # '(rtmp://.*?)\'', - # re.DOTALL, - # [ - # (1, 'path', 'could not extract video path: %s' % url), - # (2, 'player', 'could not extract video player: %s' % url), - # (3, 'url', 'could not extract video url: %s' % url) - # ] - # ) - # video_url = '%s/%s' % (info.get('url'), info.get('path')) - def _real_extract(self, url): - mobj = re.match(self._VIDEOS_URL, url) - if mobj is not None: - id = mobj.group('id') - lang = mobj.group('lang') - return self._extract_video(url, id, lang) - - mobj = re.match(self._LIVEWEB_URL, url) - if mobj is not None: - name = mobj.group('name') - lang = mobj.group('lang') - return self._extract_liveweb(url, name, lang) - - if re.search(self._LIVE_URL, url) is not None: - raise ExtractorError('Arte live streams are not yet supported, sorry') - # self.extractLiveStream(url) - # return - - raise ExtractorError('No video found') - - def _extract_video(self, url, video_id, lang): - """Extract from videos.arte.tv""" + mobj = re.match(self._VALID_URL, url) + lang = mobj.group('lang') + video_id = mobj.group('id') + ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml') ref_xml_doc = self._download_xml( ref_xml_url, video_id, note='Downloading metadata') config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang) config_xml_url = config_node.attrib['ref'] - config_xml = self._download_webpage( + config = self._download_xml( config_xml_url, video_id, note='Downloading configuration') - video_urls = list(re.finditer(r'(?P.*?)', config_xml)) - def _key(m): - quality = m.group('quality') - if quality == 'hd': - return 2 - else: - return 1 - # We pick the best quality - video_urls = sorted(video_urls, key=_key) - video_url = list(video_urls)[-1].group('url') - - title = self._html_search_regex(r'(.*?)', config_xml, 'title') - thumbnail = self._html_search_regex(r'(.*?)', - config_xml, 'thumbnail') - return {'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'url': video_url, - 'ext': 'flv', - } - - def _extract_liveweb(self, url, name, lang): - """Extract form http://liveweb.arte.tv/""" - webpage = self._download_webpage(url, name) - video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, 'event id') - config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id, - video_id, 'Downloading information') - event_doc = config_doc.find('event') - url_node = event_doc.find('video').find('urlHd') - if url_node is None: - url_node = event_doc.find('urlSd') - - return {'id': video_id, - 'title': event_doc.find('name%s' % lang.capitalize()).text, - 'url': url_node.text.replace('MP4', 'mp4'), - 'ext': 'flv', - 'thumbnail': self._og_search_thumbnail(webpage), - } + formats = [{ + 'forma_id': q.attrib['quality'], + 'url': q.text, + 'ext': 'flv', + 'quality': 2 if q.attrib['quality'] == 'hd' else 1, + } for q in config.findall('./urls/url')] + self._sort_formats(formats) + + title = config.find('.//name').text + thumbnail = config.find('.//firstThumbnailUrl').text + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } class ArteTVPlus7IE(InfoExtractor): @@ -152,9 +78,7 @@ class ArteTVPlus7IE(InfoExtractor): return self._extract_from_json_url(json_url, video_id, lang) def _extract_from_json_url(self, json_url, video_id, lang): - json_info = self._download_webpage(json_url, video_id, 'Downloading info json') - self.report_extraction(video_id) - info = json.loads(json_info) + info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] info_dict = { @@ -176,6 +100,8 @@ class ArteTVPlus7IE(InfoExtractor): l = 'F' elif lang == 'de': l = 'A' + else: + l = lang regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] return any(re.match(r, f['versionCode']) for r in regexes) # Some formats may not be in the same language as the url @@ -305,3 +231,22 @@ class ArteTVConcertIE(ArteTVPlus7IE): 'description': 'md5:486eb08f991552ade77439fe6d82c305', }, } + + +class ArteTVEmbedIE(ArteTVPlus7IE): + IE_NAME = 'arte.tv:embed' + _VALID_URL = r'''(?x) + http://www\.arte\.tv + /playerv2/embed\.php\?json_url= + (?P + http://arte\.tv/papi/tvguide/videos/stream/player/ + (?P[^/]+)/(?P[^/]+)[^&]* + ) + ''' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + lang = mobj.group('lang') + json_url = mobj.group('json_url') + return self._extract_from_json_url(json_url, video_id, lang) diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index c6f30e626..20bf12550 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -11,22 +11,24 @@ from ..utils import ( class AUEngineIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?auengine\.com/embed\.php\?.*?file=(?P[^&]+).*?' + _TEST = { 'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370', - 'file': 'lfvlytY6.mp4', 'md5': '48972bdbcf1a3a2f5533e62425b41d4f', 'info_dict': { + 'id': 'lfvlytY6', + 'ext': 'mp4', 'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]' } } - _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'(?P<title>.+?)', - webpage, 'title') + title = self._html_search_regex(r'(?P<title>.+?)', webpage, 'title') title = title.strip() links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage) links = map(compat_urllib_parse.unquote, links) @@ -39,14 +41,15 @@ class AUEngineIE(InfoExtractor): elif '/videos/' in link: video_url = link if not video_url: - raise ExtractorError(u'Could not find video URL') + raise ExtractorError('Could not find video URL') ext = '.' + determine_ext(video_url) if ext == title[-len(ext):]: title = title[:-len(ext)] return { - 'id': video_id, - 'url': video_url, - 'title': title, + 'id': video_id, + 'url': video_url, + 'title': title, 'thumbnail': thumbnail, + 'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf', } diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 2415ce403..25fb79e14 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -1,22 +1,21 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor -from .ooyala import OoyalaIE class BloombergIE(InfoExtractor): _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P.+?)\.html' _TEST = { - u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', - u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4', - u'info_dict': { - u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies', - u'description': u'md5:abc86e5236f9f0e4866c59ad36736686', - }, - u'params': { - # Requires ffmpeg (m3u8 manifest) - u'skip_download': True, + 'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', + 'md5': '7bf08858ff7c203c870e8a6190e221e5', + 'info_dict': { + 'id': 'qurhIVlJSB6hzkVi229d8g', + 'ext': 'flv', + 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', + 'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88', }, } @@ -24,7 +23,16 @@ class BloombergIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) name = mobj.group('name') webpage = self._download_webpage(url, name) - embed_code = self._search_regex( - r'[^/?#]+)' + _TEST = { + 'url': 'http://www.byutv.org/watch/44e80f7b-e3ba-43ba-8c51-b1fd96c94a79/granite-flats-talking', + 'info_dict': { + 'id': 'granite-flats-talking', + 'ext': 'mp4', + 'description': 'md5:1a7ae3e153359b7cc355ef3963441e5f', + 'title': 'Talking', + 'thumbnail': 're:^https?://.*promo.*' + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('video_id') + + webpage = self._download_webpage(url, video_id) + episode_code = self._search_regex( + r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information') + episode_json = re.sub( + r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code) + ep = json.loads(episode_json) + + if ep['providerType'] == 'Ooyala': + return { + '_type': 'url_transparent', + 'ie_key': 'Ooyala', + 'url': 'ooyala:%s' % ep['providerId'], + 'id': video_id, + 'title': ep['title'], + 'description': ep.get('description'), + 'thumbnail': ep.get('imageThumbnail'), + } + else: + raise ExtractorError('Unsupported provider %s' % ep['provider']) diff --git a/youtube_dl/extractor/c56.py b/youtube_dl/extractor/c56.py index 690bc7c25..cb96c3876 100644 --- a/youtube_dl/extractor/c56.py +++ b/youtube_dl/extractor/c56.py @@ -2,39 +2,46 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor class C56IE(InfoExtractor): - _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P.+?)\.(html|swf)' + _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P.+?)\.(?:html|swf)' IE_NAME = '56.com' _TEST = { 'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html', - 'file': '93440716.flv', 'md5': 'e59995ac63d0457783ea05f93f12a866', 'info_dict': { + 'id': '93440716', + 'ext': 'flv', 'title': '网事知多少 第32期:车怒', + 'duration': 283.813, }, } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) text_id = mobj.group('textid') - info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id, - text_id, 'Downloading video info') - info = json.loads(info_page)['info'] - formats = [{ - 'format_id': f['type'], - 'filesize': int(f['filesize']), - 'url': f['url'] - } for f in info['rfiles']] + + page = self._download_json( + 'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info') + + info = page['info'] + + formats = [ + { + 'format_id': f['type'], + 'filesize': int(f['filesize']), + 'url': f['url'] + } for f in info['rfiles'] + ] self._sort_formats(formats) return { 'id': info['vid'], 'title': info['Subject'], + 'duration': int(info['duration']) / 1000.0, 'formats': formats, 'thumbnail': info.get('bimg') or info.get('img'), } diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 7cdcd8399..49dfd881e 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -28,7 +28,7 @@ class CanalplusIE(InfoExtractor): video_id = mobj.groupdict().get('id') if video_id is None: webpage = self._download_webpage(url, mobj.group('path')) - video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id') + video_id = self._search_regex(r'[0-9]{4})/(?P[0-9]{2})/(?P[0-9]{2})/.+?' + _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P[0-9]{4})/(?P[0-9]{2})/(?P[0-9]{2})/(?P[^?#/]+)' _TESTS = [ { 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', 'file': '19911.mp4', - 'md5': 'fde81fbafaee331785f58cd6c0d46190', + 'md5': '782f8504ca95a0eba8fc9177c373eec7', 'info_dict': { 'upload_date': '20121110', 'title': '“Angry Video Game Nerd: The Movie” – Trailer', @@ -24,7 +24,7 @@ class CinemassacreIE(InfoExtractor): { 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', 'file': '521be8ef82b16.mp4', - 'md5': 'd72f10cd39eac4215048f62ab477a511', + 'md5': 'dec39ee5118f8d9cc067f45f9cbe3a35', 'info_dict': { 'upload_date': '20131002', 'title': 'The Mummy’s Hand (1940)', @@ -34,8 +34,9 @@ class CinemassacreIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') - webpage = self._download_webpage(url, None) # Don't know video id yet + webpage = self._download_webpage(url, display_id) video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') mobj = re.search(r'src="(?Phttp://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P.+?))"', webpage) if not mobj: @@ -43,33 +44,36 @@ class CinemassacreIE(InfoExtractor): playerdata_url = mobj.group('embed_url') video_id = mobj.group('video_id') - video_title = self._html_search_regex(r'(?P<title>.+?)\|', - webpage, 'title') - video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>', + video_title = self._html_search_regex( + r'<title>(?P<title>.+?)\|', webpage, 'title') + video_description = self._html_search_regex( + r'<div class="entry-content">(?P<description>.+?)</div>', webpage, 'description', flags=re.DOTALL, fatal=False) - if len(video_description) == 0: - video_description = None playerdata = self._download_webpage(playerdata_url, video_id) - sd_url = self._html_search_regex(r'file: \'(?P<sd_file>[^\']+)\', label: \'SD\'', playerdata, 'sd_file') - hd_url = self._html_search_regex(r'file: \'(?P<hd_file>[^\']+)\', label: \'HD\'', playerdata, 'hd_file') + sd_url = self._html_search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file') + hd_url = self._html_search_regex( + r'file: \'([^\']+)\', label: \'HD\'', playerdata, 'hd_file', + default=None) video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False) - formats = [ - { - 'url': sd_url, - 'ext': 'mp4', - 'format': 'sd', - 'format_id': 'sd', - }, - { + formats = [{ + 'url': sd_url, + 'ext': 'mp4', + 'format': 'sd', + 'format_id': 'sd', + 'quality': 1, + }] + if hd_url: + formats.append({ 'url': hd_url, 'ext': 'mp4', 'format': 'hd', 'format_id': 'hd', - }, - ] + 'quality': 2, + }) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py index 43efb08bf..669919a2c 100644 --- a/youtube_dl/extractor/clipfish.py +++ b/youtube_dl/extractor/clipfish.py @@ -1,22 +1,28 @@ +from __future__ import unicode_literals + import re import time import xml.etree.ElementTree from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + parse_duration, +) class ClipfishIE(InfoExtractor): - IE_NAME = u'clipfish' + IE_NAME = 'clipfish' _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/' _TEST = { - u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/', - u'file': u'3966754.mp4', - u'md5': u'2521cd644e862936cf2e698206e47385', - u'info_dict': { - u'title': u'FIFA 14 - E3 2013 Trailer', - u'duration': 82, + 'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/', + 'md5': '2521cd644e862936cf2e698206e47385', + 'info_dict': { + 'id': '3966754', + 'ext': 'mp4', + 'title': 'FIFA 14 - E3 2013 Trailer', + 'duration': 82, }, u'skip': 'Blocked in the US' } @@ -33,21 +39,10 @@ class ClipfishIE(InfoExtractor): video_url = doc.find('filename').text if video_url is None: xml_bytes = xml.etree.ElementTree.tostring(doc) - raise ExtractorError(u'Cannot find video URL in document %r' % + raise ExtractorError('Cannot find video URL in document %r' % xml_bytes) thumbnail = doc.find('imageurl').text - duration_str = doc.find('duration').text - m = re.match( - r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$', - duration_str) - if m: - duration = ( - (int(m.group('hours')) * 60 * 60) + - (int(m.group('minutes')) * 60) + - (int(m.group('seconds'))) - ) - else: - duration = None + duration = parse_duration(doc.find('duration').text) return { 'id': video_id, diff --git a/youtube_dl/extractor/clipsyndicate.py b/youtube_dl/extractor/clipsyndicate.py index 9ab6a4ab6..02a1667fa 100644 --- a/youtube_dl/extractor/clipsyndicate.py +++ b/youtube_dl/extractor/clipsyndicate.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -11,13 +13,14 @@ class ClipsyndicateIE(InfoExtractor): _VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)' _TEST = { - u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe', - u'md5': u'4d7d549451bad625e0ff3d7bd56d776c', - u'info_dict': { - u'id': u'4629301', - u'ext': u'mp4', - u'title': u'Brick Briscoe', - u'duration': 612, + 'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe', + 'md5': '4d7d549451bad625e0ff3d7bd56d776c', + 'info_dict': { + 'id': '4629301', + 'ext': 'mp4', + 'title': 'Brick Briscoe', + 'duration': 612, + 'thumbnail': 're:^https?://.+\.jpg', }, } @@ -26,13 +29,13 @@ class ClipsyndicateIE(InfoExtractor): video_id = mobj.group('id') js_player = self._download_webpage( 'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id, - video_id, u'Downlaoding player') + video_id, 'Downlaoding player') # it includes a required token - flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars') + flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars') pdoc = self._download_xml( 'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars, - video_id, u'Downloading video info', + video_id, 'Downloading video info', transform_source=fix_xml_ampersands) track_doc = pdoc.find('trackList/track') diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cnet.py new file mode 100644 index 000000000..f5ab443d2 --- /dev/null +++ b/youtube_dl/extractor/cnet.py @@ -0,0 +1,75 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, +) + + +class CNETIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/' + _TEST = { + 'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/', + 'md5': '041233212a0d06b179c87cbcca1577b8', + 'info_dict': { + 'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60', + 'ext': 'mp4', + 'title': 'Hands-on with Microsoft Windows 8.1 Update', + 'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.', + 'thumbnail': 're:^http://.*/flmswindows8.jpg$', + 'uploader_id': 'sarah.mitroff@cbsinteractive.com', + 'uploader': 'Sarah Mitroff', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') + + webpage = self._download_webpage(url, display_id) + data_json = self._html_search_regex( + r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'", + webpage, 'data json') + data = json.loads(data_json) + vdata = data['video'] + if not vdata: + vdata = data['videos'][0] + if not vdata: + raise ExtractorError('Cannot find video data') + + video_id = vdata['id'] + title = vdata['headline'] + description = vdata.get('dek') + thumbnail = vdata.get('image', {}).get('path') + author = vdata.get('author') + if author: + uploader = '%s %s' % (author['firstName'], author['lastName']) + uploader_id = author.get('email') + else: + uploader = None + uploader_id = None + + formats = [{ + 'format_id': '%s-%s-%s' % ( + f['type'], f['format'], + int_or_none(f.get('bitrate'), 1000, default='')), + 'url': f['uri'], + 'tbr': int_or_none(f.get('bitrate'), 1000), + } for f in vdata['files']['data']] + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + 'description': description, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index d50fcdbdb..0c99887a2 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -7,8 +7,8 @@ from .mtv import MTVServicesInfoExtractor from ..utils import ( compat_str, compat_urllib_parse, - ExtractorError, + float_or_none, unified_strdate, ) @@ -32,31 +32,34 @@ class ComedyCentralIE(MTVServicesInfoExtractor): class ComedyCentralShowsIE(InfoExtractor): - IE_DESC = 'The Daily Show / Colbert Report' + IE_DESC = 'The Daily Show / The Colbert Report' # urls can be abbreviations like :thedailyshow or :colbert # urls for episodes like: # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day # or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news # or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524 - _VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport) - |(https?://)?(www\.)? - (?P<showname>thedailyshow|colbertnation)\.com/ - (full-episodes/(?P<episode>.*)| + _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport) + |https?://(:www\.)? + (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/ + (full-episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)| (?P<clip> - (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) - |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))| + (?:(?:guests/[^/]+|videos)/[^/]+/(?P<videotitle>[^/?#]+)) + |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) + |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) + )| (?P<interview> - extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?))) - $""" + extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?))) + (?:[?#].*|$)''' _TEST = { - 'url': 'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart', - 'file': '422212.mp4', + 'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart', 'md5': '4e2f5cb088a83cd8cdb7756132f9739d', 'info_dict': { - "upload_date": "20121214", - "description": "Kristen Stewart", - "uploader": "thedailyshow", - "title": "thedailyshow-kristen-stewart part 1" + 'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55', + 'ext': 'mp4', + 'upload_date': '20121213', + 'description': 'Kristen Stewart learns to let loose in "On the Road."', + 'uploader': 'thedailyshow', + 'title': 'thedailyshow kristen-stewart part 1', } } @@ -79,11 +82,6 @@ class ComedyCentralShowsIE(InfoExtractor): '400': (384, 216), } - @classmethod - def suitable(cls, url): - """Receives a URL and returns True if suitable for this IE.""" - return re.match(cls._VALID_URL, url, re.VERBOSE) is not None - @staticmethod def _transform_rtmp_url(rtmp_video_url): m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url) @@ -99,14 +97,16 @@ class ComedyCentralShowsIE(InfoExtractor): if mobj.group('shortname'): if mobj.group('shortname') in ('tds', 'thedailyshow'): - url = 'http://www.thedailyshow.com/full-episodes/' + url = 'http://thedailyshow.cc.com/full-episodes/' else: - url = 'http://www.colbertnation.com/full-episodes/' + url = 'http://thecolbertreport.cc.com/full-episodes/' mobj = re.match(self._VALID_URL, url, re.VERBOSE) assert mobj is not None if mobj.group('clip'): - if mobj.group('showname') == 'thedailyshow': + if mobj.group('videotitle'): + epTitle = mobj.group('videotitle') + elif mobj.group('showname') == 'thedailyshow': epTitle = mobj.group('tdstitle') else: epTitle = mobj.group('cntitle') @@ -120,9 +120,9 @@ class ComedyCentralShowsIE(InfoExtractor): epTitle = mobj.group('showname') else: epTitle = mobj.group('episode') + show_name = mobj.group('showname') - self.report_extraction(epTitle) - webpage,htmlHandle = self._download_webpage_handle(url, epTitle) + webpage, htmlHandle = self._download_webpage_handle(url, epTitle) if dlNewest: url = htmlHandle.geturl() mobj = re.match(self._VALID_URL, url, re.VERBOSE) @@ -130,71 +130,86 @@ class ComedyCentralShowsIE(InfoExtractor): raise ExtractorError('Invalid redirected URL: ' + url) if mobj.group('episode') == '': raise ExtractorError('Redirected URL is still not specific: ' + url) - epTitle = mobj.group('episode') + epTitle = mobj.group('episode').rpartition('/')[-1] mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage) - if len(mMovieParams) == 0: # The Colbert Report embeds the information in a without # a URL prefix; so extract the alternate reference # and then add the URL prefix manually. - altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage) + altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage) if len(altMovieParams) == 0: raise ExtractorError('unable to find Flash URL in webpage ' + url) else: mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])] uri = mMovieParams[0][1] - indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri}) - idoc = self._download_xml(indexUrl, epTitle, - 'Downloading show index', - 'unable to download episode index') - - results = [] - - itemEls = idoc.findall('.//item') - for partNum,itemEl in enumerate(itemEls): - mediaId = itemEl.findall('./guid')[0].text - shortMediaId = mediaId.split(':')[-1] - showId = mediaId.split(':')[-2].replace('.com', '') - officialTitle = itemEl.findall('./title')[0].text - officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text) - - configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + - compat_urllib_parse.urlencode({'uri': mediaId})) - cdoc = self._download_xml(configUrl, epTitle, - 'Downloading configuration for %s' % shortMediaId) + # Correct cc.com in uri + uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.cc.com', uri) + + index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri})) + idoc = self._download_xml( + index_url, epTitle, + 'Downloading show index', 'Unable to download episode index') + + title = idoc.find('./channel/title').text + description = idoc.find('./channel/description').text + + entries = [] + item_els = idoc.findall('.//item') + for part_num, itemEl in enumerate(item_els): + upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text) + thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url') + + content = itemEl.find('.//{http://search.yahoo.com/mrss/}content') + duration = float_or_none(content.attrib.get('duration')) + mediagen_url = content.attrib['url'] + guid = itemEl.find('./guid').text.rpartition(':')[-1] + + cdoc = self._download_xml( + mediagen_url, epTitle, + 'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els))) turls = [] for rendition in cdoc.findall('.//rendition'): finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) turls.append(finfo) - if len(turls) == 0: - self._downloader.report_error('unable to download ' + mediaId + ': No videos found') - continue - formats = [] for format, rtmp_video_url in turls: w, h = self._video_dimensions.get(format, (None, None)) formats.append({ + 'format_id': 'vhttp-%s' % format, 'url': self._transform_rtmp_url(rtmp_video_url), 'ext': self._video_extensions.get(format, 'mp4'), - 'format_id': format, 'height': h, 'width': w, }) + formats.append({ + 'format_id': 'rtmp-%s' % format, + 'url': rtmp_video_url, + 'ext': self._video_extensions.get(format, 'mp4'), + 'height': h, + 'width': w, + }) + self._sort_formats(formats) - effTitle = showId + '-' + epTitle + ' part ' + compat_str(partNum+1) - results.append({ - 'id': shortMediaId, + virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1) + entries.append({ + 'id': guid, + 'title': virtual_id, 'formats': formats, - 'uploader': showId, - 'upload_date': officialDate, - 'title': effTitle, - 'thumbnail': None, - 'description': compat_str(officialTitle), + 'uploader': show_name, + 'upload_date': upload_date, + 'duration': duration, + 'thumbnail': thumbnail, + 'description': description, }) - return results + return { + '_type': 'playlist', + 'entries': entries, + 'title': show_name + ' ' + title, + 'description': description, + } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 647720c8a..da4193734 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -74,7 +74,7 @@ class InfoExtractor(object): "http", "https", "rtsp", "rtmp", "m3u8" or so. * preference Order number of this format. If this field is present and not None, the formats get sorted - by this field. + by this field, regardless of all other values. -1 for default (order by other properties), -2 or smaller for less than default. * quality Order number of the video quality of this @@ -252,6 +252,17 @@ class InfoExtractor(object): outf.write(webpage_bytes) content = webpage_bytes.decode(encoding, 'replace') + + if (u'<title>Access to this site is blocked' in content and + u'Websense' in content[:512]): + msg = u'Access to this webpage has been blocked by Websense filtering software in your network.' + blocked_iframe = self._html_search_regex( + r'