Merge branch 'sohu_fix' of https://github.com/yan12125/youtube-dl into yan12125-sohu_fix
authorSergey M․ <dstftw@gmail.com>
Tue, 17 Mar 2015 15:18:36 +0000 (21:18 +0600)
committerSergey M․ <dstftw@gmail.com>
Tue, 17 Mar 2015 15:18:36 +0000 (21:18 +0600)
58 files changed:
.travis.yml
AUTHORS
CONTRIBUTING.md
README.md
docs/supportedsites.md
test/test_YoutubeDL.py
test/test_all_urls.py
test/test_postprocessors.py [new file with mode: 0644]
test/test_subtitles.py
test/test_unicode_literals.py
test/test_utils.py
tox.ini
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/downloader/f4m.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/adultswim.py
youtube_dl/extractor/aftenposten.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/beatportpro.py [new file with mode: 0644]
youtube_dl/extractor/breakcom.py
youtube_dl/extractor/cloudy.py
youtube_dl/extractor/common.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/eagleplatform.py [new file with mode: 0644]
youtube_dl/extractor/eighttracks.py
youtube_dl/extractor/footyroom.py [new file with mode: 0644]
youtube_dl/extractor/funnyordie.py
youtube_dl/extractor/gazeta.py [new file with mode: 0644]
youtube_dl/extractor/generic.py
youtube_dl/extractor/globo.py
youtube_dl/extractor/jeuxvideo.py
youtube_dl/extractor/kanalplay.py [new file with mode: 0644]
youtube_dl/extractor/letv.py
youtube_dl/extractor/livestream.py
youtube_dl/extractor/niconico.py
youtube_dl/extractor/npo.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/orf.py
youtube_dl/extractor/pladform.py [new file with mode: 0644]
youtube_dl/extractor/primesharetv.py [new file with mode: 0644]
youtube_dl/extractor/rtve.py
youtube_dl/extractor/ssa.py [new file with mode: 0644]
youtube_dl/extractor/teamcoco.py
youtube_dl/extractor/twitch.py
youtube_dl/extractor/vidme.py
youtube_dl/extractor/viewster.py [new file with mode: 0644]
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/yam.py
youtube_dl/extractor/yandexmusic.py [new file with mode: 0644]
youtube_dl/extractor/youtube.py
youtube_dl/options.py
youtube_dl/postprocessor/__init__.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/postprocessor/metadatafromtitle.py [new file with mode: 0644]
youtube_dl/utils.py
youtube_dl/version.py

index fb34299fced3f24a7f4de265aadf3094299d6523..511bee64cdb8398640a6aa1f4159f5d1f5ce0d3e 100644 (file)
@@ -2,6 +2,7 @@ language: python
 python:
   - "2.6"
   - "2.7"
+  - "3.2"
   - "3.3"
   - "3.4"
 before_install:
diff --git a/AUTHORS b/AUTHORS
index 4674a5af3cf129b33c716f88f323038477110c05..872da60711fcda8aa55cb4c8f9f1a0d5cf7e3884 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -113,3 +113,6 @@ Robin de Rooij
 Ryan Schmidt
 Leslie P. Polzer
 Duncan Keall
+Alexander Mamay
+Devin J. Pohly
+Eduardo Ferro Aldama
index 351229f2106a62bd229fa3c8d5649c1a9e4b09c6..588b15bde7a3ba367c17d1fb3819a2070aeea9f0 100644 (file)
@@ -18,7 +18,9 @@ If your report is shorter than two lines, it is almost certainly missing some of
 
 For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
 
-Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
+If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
+
+**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
 
 ###  Are you using the latest version?
 
index 5b9dd2cea25396acb09a28d7fcef463497be0088..4f9fc8174db8c4ced337bc8f33798f1cf8922b3d 100644 (file)
--- a/README.md
+++ b/README.md
@@ -167,7 +167,7 @@ which means you can modify it, redistribute it or use it however you like.
     --no-progress                    do not print progress bar
     --console-title                  display progress in console titlebar
     -v, --verbose                    print various debugging information
-    --dump-intermediate-pages        print downloaded pages to debug problems (very verbose)
+    --dump-pages                     print downloaded pages to debug problems (very verbose)
     --write-pages                    Write downloaded intermediary pages to files in the current directory to debug problems
     --print-traffic                  Display sent and read HTTP traffic
     -C, --call-home                  Contact the youtube-dl server for debugging.
@@ -228,6 +228,9 @@ which means you can modify it, redistribute it or use it however you like.
     --embed-subs                     embed subtitles in the video (only for mp4 videos)
     --embed-thumbnail                embed thumbnail in the audio as cover art
     --add-metadata                   write metadata to the video file
+    --metadata-from-title FORMAT     parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
+                                     parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
+                                     %(title)s" matches a title like "Coldplay - Paradise"
     --xattrs                         write metadata to the video file's xattrs (using dublin core and xdg standards)
     --fixup POLICY                   Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
                                      fix file if we can, warn otherwise)
@@ -404,6 +407,18 @@ A note on the service that they don't host the infringing content, but just link
 
 Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
 
+### How can I speed up work on my issue?
+
+(Also known as: Help, my important issue not being solved!) The youtube-dl core developer team is quite small. While we do our best to solve as many issues as possible, sometimes that can take quite a while. To speed up your issue, here's what you can do:
+
+First of all, please do report the issue [at our issue tracker](https://yt-dl.org/bugs). That allows us to coordinate all efforts by users and developers, and serves as a unified point. Unfortunately, the youtube-dl project has grown too large to use personal email as an effective communication channel.
+
+Please read the [bug reporting instructions](#bugs) below. A lot of bugs lack all the necessary information. If you can, offer proxy, VPN, or shell access to the youtube-dl developers. If you are able to, test the issue from multiple computers in multiple countries to exclude local censorship or misconfiguration issues.
+
+If nobody is interested in solving your issue, you are welcome to take matters into your own hands and submit a pull request (or coerce/pay somebody else to do so).
+
+Feel free to bump the issue from time to time by writing a small comment ("Issue is still present in youtube-dl version ...from France, but fixed from Belgium"), but please not more than once a month. Please do not declare your issue as `important` or `urgent`.
+
 ### How can I detect whether a given URL is supported by youtube-dl?
 
 For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
@@ -503,6 +518,7 @@ youtube-dl makes the best effort to be a good command-line program, and thus sho
 From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
 
 ```python
+from __future__ import unicode_literals
 import youtube_dl
 
 ydl_opts = {}
@@ -515,6 +531,7 @@ Most likely, you'll want to use various options. For a list of what can be done,
 Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
 
 ```python
+from __future__ import unicode_literals
 import youtube_dl
 
 
@@ -572,7 +589,9 @@ If your report is shorter than two lines, it is almost certainly missing some of
 
 For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
 
-Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
+If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
+
+**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
 
 ###  Are you using the latest version?
 
index 062cb3d626443e7f69059aaea10c3ec3a30a7f28..d6a1e67c625379c966e98a87f2f0989ec720afdf 100644 (file)
@@ -47,6 +47,7 @@
  - **Bandcamp**
  - **Bandcamp:album**
  - **bbc.co.uk**: BBC iPlayer
+ - **BeatportPro**
  - **Beeg**
  - **BehindKink**
  - **Bet**
  - **DRTV**
  - **Dump**
  - **dvtv**: http://video.aktualne.cz/
+ - **EaglePlatform**
  - **EbaumsWorld**
  - **EchoMsk**
  - **eHow**
  - **Firstpost**
  - **Flickr**
  - **Folketinget**: Folketinget (ft.dk; Danish parliament)
+ - **FootyRoom**
  - **Foxgay**
  - **FoxNews**
  - **france2.fr:generation-quoi**
  - **GameSpot**
  - **GameStar**
  - **Gametrailers**
+ - **Gazeta**
  - **GDCVault**
  - **generic**: Generic downloader that works on some sites
  - **GiantBomb**
  - **jpopsuki.tv**
  - **Jukebox**
  - **Kaltura**
+ - **KanalPlay**: Kanal 5/9/11 Play
  - **Kankan**
  - **Karaoketv**
  - **keek**
  - **Ooyala**
  - **OpenFilm**
  - **orf:fm4**: radio FM4
+ - **orf:iptv**: iptv.ORF.at
  - **orf:oe1**: Radio Österreich 1
  - **orf:tvthek**: ORF TVthek
  - **parliamentlive.tv**: UK parliament videos
  - **PBS**
  - **Phoenix**
  - **Photobucket**
+ - **Pladform**
  - **PlanetaPlay**
  - **play.fm**
  - **played.to**
  - **Playvid**
+ - **Playwire**
  - **plus.google**: Google Plus
  - **pluzz.francetv.fr**
  - **podomatic**
  - **SportBox**
  - **SportDeutschland**
  - **SRMediathek**: Saarländischer Rundfunk
+ - **SSA**
  - **stanfordoc**: Stanford Open ClassRoom
  - **Steam**
  - **streamcloud.eu**
  - **Vidzi**
  - **vier**
  - **vier:videos**
+ - **Viewster**
  - **viki**
  - **vimeo**
  - **vimeo:album**
  - **XXXYMovies**
  - **Yahoo**: Yahoo screen and movies
  - **Yam**
+ - **yandexmusic:album**: Яндекс.Музыка - Альбом
+ - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
+ - **yandexmusic:track**: Яндекс.Музыка - Трек
  - **YesJapan**
  - **Ynet**
  - **YouJizz**
index 055e4255583d500805facc4fc59e296170e876e4..db8a47d2d0d9e18f46e78b43068196efadc0f943 100644 (file)
@@ -15,6 +15,8 @@ from youtube_dl import YoutubeDL
 from youtube_dl.extractor import YoutubeIE
 from youtube_dl.postprocessor.common import PostProcessor
 
+TEST_URL = 'http://localhost/sample.mp4'
+
 
 class YDL(FakeYDL):
     def __init__(self, *args, **kwargs):
@@ -46,8 +48,8 @@ class TestFormatSelection(unittest.TestCase):
         ydl = YDL()
         ydl.params['prefer_free_formats'] = True
         formats = [
-            {'ext': 'webm', 'height': 460, 'url': 'x'},
-            {'ext': 'mp4', 'height': 460, 'url': 'y'},
+            {'ext': 'webm', 'height': 460, 'url': TEST_URL},
+            {'ext': 'mp4', 'height': 460, 'url': TEST_URL},
         ]
         info_dict = _make_result(formats)
         yie = YoutubeIE(ydl)
@@ -60,8 +62,8 @@ class TestFormatSelection(unittest.TestCase):
         ydl = YDL()
         ydl.params['prefer_free_formats'] = True
         formats = [
-            {'ext': 'webm', 'height': 720, 'url': 'a'},
-            {'ext': 'mp4', 'height': 1080, 'url': 'b'},
+            {'ext': 'webm', 'height': 720, 'url': TEST_URL},
+            {'ext': 'mp4', 'height': 1080, 'url': TEST_URL},
         ]
         info_dict['formats'] = formats
         yie = YoutubeIE(ydl)
@@ -74,9 +76,9 @@ class TestFormatSelection(unittest.TestCase):
         ydl = YDL()
         ydl.params['prefer_free_formats'] = False
         formats = [
-            {'ext': 'webm', 'height': 720, 'url': '_'},
-            {'ext': 'mp4', 'height': 720, 'url': '_'},
-            {'ext': 'flv', 'height': 720, 'url': '_'},
+            {'ext': 'webm', 'height': 720, 'url': TEST_URL},
+            {'ext': 'mp4', 'height': 720, 'url': TEST_URL},
+            {'ext': 'flv', 'height': 720, 'url': TEST_URL},
         ]
         info_dict['formats'] = formats
         yie = YoutubeIE(ydl)
@@ -88,8 +90,8 @@ class TestFormatSelection(unittest.TestCase):
         ydl = YDL()
         ydl.params['prefer_free_formats'] = False
         formats = [
-            {'ext': 'flv', 'height': 720, 'url': '_'},
-            {'ext': 'webm', 'height': 720, 'url': '_'},
+            {'ext': 'flv', 'height': 720, 'url': TEST_URL},
+            {'ext': 'webm', 'height': 720, 'url': TEST_URL},
         ]
         info_dict['formats'] = formats
         yie = YoutubeIE(ydl)
@@ -133,10 +135,10 @@ class TestFormatSelection(unittest.TestCase):
 
     def test_format_selection(self):
         formats = [
-            {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'},
-            {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'},
-            {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'},
-            {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'},
+            {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
+            {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
+            {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
+            {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
         ]
         info_dict = _make_result(formats)
 
@@ -167,10 +169,10 @@ class TestFormatSelection(unittest.TestCase):
 
     def test_format_selection_audio(self):
         formats = [
-            {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'},
-            {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'},
-            {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'},
-            {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'},
+            {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
+            {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
+            {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL},
+            {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL},
         ]
         info_dict = _make_result(formats)
 
@@ -185,8 +187,8 @@ class TestFormatSelection(unittest.TestCase):
         self.assertEqual(downloaded['format_id'], 'audio-low')
 
         formats = [
-            {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'},
-            {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'},
+            {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
+            {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL},
         ]
         info_dict = _make_result(formats)
 
@@ -228,9 +230,9 @@ class TestFormatSelection(unittest.TestCase):
 
     def test_format_selection_video(self):
         formats = [
-            {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
-            {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'},
-            {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'},
+            {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL},
+            {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL},
+            {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL},
         ]
         info_dict = _make_result(formats)
 
index e66264b4b16147cae6e41d329bf07dcc31ff83e4..6ae168b7f472938a3a69344fac123e047478ee73 100644 (file)
@@ -104,11 +104,11 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertMatch(':tds', ['ComedyCentralShows'])
 
     def test_vimeo_matching(self):
-        self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
-        self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel'])
-        self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo'])
-        self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
-        self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user'])
+        self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
+        self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel'])
+        self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo'])
+        self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user'])
+        self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user'])
         self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
 
     # https://github.com/rg3/youtube-dl/issues/1930
diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py
new file mode 100644 (file)
index 0000000..addb69d
--- /dev/null
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.postprocessor import MetadataFromTitlePP
+
+
+class TestMetadataFromTitle(unittest.TestCase):
+    def test_format_to_regex(self):
+        pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
+        self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
index 3f2d8a2ba74e6b4f04d4159a64deb1f69f9d105b..891ee620b1f2627dd6991e0cccfbc58b59fb6a95 100644 (file)
@@ -26,6 +26,7 @@ from youtube_dl.extractor import (
     VikiIE,
     ThePlatformIE,
     RTVEALaCartaIE,
+    FunnyOrDieIE,
 )
 
 
@@ -320,5 +321,17 @@ class TestRtveSubtitles(BaseTestSubtitles):
         self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
 
 
+class TestFunnyOrDieSubtitles(BaseTestSubtitles):
+    url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
+    IE = FunnyOrDieIE
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['en']))
+        self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
+
+
 if __name__ == '__main__':
     unittest.main()
index 7f816698e7b2e20bc982b0eeb9638885edf19b48..6c1b7ec915c60321e62c7c44728f5486921e772f 100644 (file)
@@ -17,13 +17,22 @@ IGNORED_FILES = [
     'buildserver.py',
 ]
 
+IGNORED_DIRS = [
+    '.git',
+    '.tox',
+]
 
 from test.helper import assertRegexpMatches
 
 
 class TestUnicodeLiterals(unittest.TestCase):
     def test_all_files(self):
-        for dirpath, _, filenames in os.walk(rootDir):
+        for dirpath, dirnames, filenames in os.walk(rootDir):
+            for ignore_dir in IGNORED_DIRS:
+                if ignore_dir in dirnames:
+                    # If we remove the directory from dirnames os.walk won't
+                    # recurse into it
+                    dirnames.remove(ignore_dir)
             for basename in filenames:
                 if not basename.endswith('.py'):
                     continue
index e02069c4dfe3c9b9b4aec5d1838ac726776dbc66..4f0ffd4824506f2f171e8b0e374d034d6ae8ccea 100644 (file)
@@ -38,6 +38,7 @@ from youtube_dl.utils import (
     parse_iso8601,
     read_batch_urls,
     sanitize_filename,
+    sanitize_path,
     shell_quote,
     smuggle_url,
     str_to_int,
@@ -132,6 +133,42 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
         self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
 
+    def test_sanitize_path(self):
+        if sys.platform != 'win32':
+            return
+
+        self.assertEqual(sanitize_path('abc'), 'abc')
+        self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
+        self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
+        self.assertEqual(sanitize_path('abc|def'), 'abc#def')
+        self.assertEqual(sanitize_path('<>:"|?*'), '#######')
+        self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def')
+        self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def')
+
+        self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc')
+        self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc')
+
+        self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
+        self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc')
+        self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f')
+        self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
+
+        self.assertEqual(
+            sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'),
+            'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s')
+
+        self.assertEqual(
+            sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'),
+            'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part')
+        self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#')
+        self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def')
+        self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
+
+        self.assertEqual(sanitize_path('../abc'), '..\\abc')
+        self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc')
+        self.assertEqual(sanitize_path('./abc'), 'abc')
+        self.assertEqual(sanitize_path('./../abc'), '..\\abc')
+
     def test_ordered_set(self):
         self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
         self.assertEqual(orderedSet([]), [])
diff --git a/tox.ini b/tox.ini
index ed01e3386d8efcaff7bb846ac1f83c3d62763fb0..00c6e00e3b72c4de21dc725173e3bb60ea5fa55b 100644 (file)
--- a/tox.ini
+++ b/tox.ini
@@ -1,8 +1,11 @@
 [tox]
-envlist = py26,py27,py33
+envlist = py26,py27,py33,py34
 [testenv]
 deps =
    nose
    coverage
-commands = nosetests --verbose {posargs:test}  # --with-coverage --cover-package=youtube_dl --cover-html
+defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
+    --exclude test_subtitles.py --exclude test_write_annotations.py
+    --exclude test_youtube_lists.py
+commands = nosetests --verbose {posargs:{[testenv]defaultargs}}  # --with-coverage --cover-package=youtube_dl --cover-html
                                                # test.test_download:TestDownload.test_NowVideo
index df2aebb59f836e7255ccaa6fcce545154d58b65d..5a83bc95662b92e786ed1088155321645eadea98 100755 (executable)
@@ -61,6 +61,7 @@ from .utils import (
     render_table,
     SameFileError,
     sanitize_filename,
+    sanitize_path,
     std_headers,
     subtitles_filename,
     takewhile_inclusive,
@@ -322,6 +323,11 @@ class YoutubeDL(object):
                 'Set the LC_ALL environment variable to fix this.')
             self.params['restrictfilenames'] = True
 
+        if isinstance(params.get('outtmpl'), bytes):
+            self.report_warning(
+                'Parameter outtmpl is bytes, but should be a unicode string. '
+                'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
+
         if '%(stitle)s' in self.params.get('outtmpl', ''):
             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 
@@ -562,7 +568,7 @@ class YoutubeDL(object):
                                  if v is not None)
             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 
-            outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+            outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
             tmpl = compat_expanduser(outtmpl)
             filename = tmpl % template_dict
             # Temporary fix for #4787
@@ -629,7 +635,7 @@ class YoutubeDL(object):
         Returns a list with a dictionary for each video we find.
         If 'download', also downloads the videos.
         extra_info is a dict containing the extra values to add to each result
-         '''
+        '''
 
         if ie_key:
             ies = [self.get_info_extractor(ie_key)]
@@ -1085,8 +1091,7 @@ class YoutubeDL(object):
         if req_format is None:
             req_format = 'best'
         formats_to_download = []
-        # The -1 is for supporting YoutubeIE
-        if req_format in ('-1', 'all'):
+        if req_format == 'all':
             formats_to_download = formats
         else:
             for rfstr in req_format.split(','):
@@ -1261,7 +1266,7 @@ class YoutubeDL(object):
             return
 
         try:
-            dn = os.path.dirname(encodeFilename(filename))
+            dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
             if dn and not os.path.exists(dn):
                 os.makedirs(dn)
         except (OSError, IOError) as err:
index a08ddd67097162989e3c5c82502d5654bbc4b0e0..852b2fc3db24b85138b44e7a5cf8f4338d787ce7 100644 (file)
@@ -213,6 +213,11 @@ def _real_main(argv=None):
     # PostProcessors
     postprocessors = []
     # Add the metadata pp first, the other pps will copy it
+    if opts.metafromtitle:
+        postprocessors.append({
+            'key': 'MetadataFromTitle',
+            'titleformat': opts.metafromtitle
+        })
     if opts.addmetadata:
         postprocessors.append({'key': 'FFmpegMetadata'})
     if opts.extractaudio:
index 3dc796faaf038c00383089274c0005382977a431..4ab000d6732cca5f71e8b8ab1d99dd5bfb49d57e 100644 (file)
@@ -281,7 +281,7 @@ class F4mFD(FileDownloader):
             boot_info = self._get_bootstrap_from_url(bootstrap_url)
         else:
             bootstrap_url = None
-            bootstrap = base64.b64decode(node.text)
+            bootstrap = base64.b64decode(node.text.encode('ascii'))
             boot_info = read_bootstrap_info(bootstrap)
         return (boot_info, bootstrap_url)
 
@@ -308,7 +308,7 @@ class F4mFD(FileDownloader):
         live = boot_info['live']
         metadata_node = media.find(_add_ns('metadata'))
         if metadata_node is not None:
-            metadata = base64.b64decode(metadata_node.text)
+            metadata = base64.b64decode(metadata_node.text.encode('ascii'))
         else:
             metadata = None
 
index 5ca534cdf2655af31b7b647ba00e840379403222..bceed92e1d15b7fca0271bad740270814f72ff0b 100644 (file)
@@ -37,6 +37,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbccouk import BBCCoUkIE
 from .beeg import BeegIE
 from .behindkink import BehindKinkIE
+from .beatportpro import BeatportProIE
 from .bet import BetIE
 from .bild import BildIE
 from .bilibili import BiliBiliIE
@@ -116,6 +117,7 @@ from .defense import DefenseGouvFrIE
 from .discovery import DiscoveryIE
 from .divxstage import DivxStageIE
 from .dropbox import DropboxIE
+from .eagleplatform import EaglePlatformIE
 from .ebaumsworld import EbaumsWorldIE
 from .echomsk import EchoMskIE
 from .ehow import EHowIE
@@ -150,6 +152,7 @@ from .fktv import (
 )
 from .flickr import FlickrIE
 from .folketinget import FolketingetIE
+from .footyroom import FootyRoomIE
 from .fourtube import FourTubeIE
 from .foxgay import FoxgayIE
 from .foxnews import FoxNewsIE
@@ -174,6 +177,7 @@ from .gameone import (
 from .gamespot import GameSpotIE
 from .gamestar import GameStarIE
 from .gametrailers import GametrailersIE
+from .gazeta import GazetaIE
 from .gdcvault import GDCVaultIE
 from .generic import GenericIE
 from .giantbomb import GiantBombIE
@@ -228,6 +232,7 @@ from .jove import JoveIE
 from .jukebox import JukeboxIE
 from .jpopsukitv import JpopsukiIE
 from .kaltura import KalturaIE
+from .kanalplay import KanalPlayIE
 from .kankan import KankanIE
 from .karaoketv import KaraoketvIE
 from .keezmovies import KeezMoviesIE
@@ -354,6 +359,7 @@ from .orf import (
     ORFTVthekIE,
     ORFOE1IE,
     ORFFM4IE,
+    ORFIPTVIE,
 )
 from .parliamentliveuk import ParliamentLiveUKIE
 from .patreon import PatreonIE
@@ -361,6 +367,7 @@ from .pbs import PBSIE
 from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
 from .planetaplay import PlanetaPlayIE
+from .pladform import PladformIE
 from .played import PlayedIE
 from .playfm import PlayFMIE
 from .playvid import PlayvidIE
@@ -373,6 +380,7 @@ from .pornhub import (
 )
 from .pornotube import PornotubeIE
 from .pornoxo import PornoXOIE
+from .primesharetv import PrimeShareTVIE
 from .promptfile import PromptFileIE
 from .prosiebensat1 import ProSiebenSat1IE
 from .puls4 import Puls4IE
@@ -398,7 +406,7 @@ from .rtlnow import RTLnowIE
 from .rtl2 import RTL2IE
 from .rtp import RTPIE
 from .rts import RTSIE
-from .rtve import RTVEALaCartaIE, RTVELiveIE
+from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
 from .ruhd import RUHDIE
 from .rutube import (
     RutubeIE,
@@ -456,6 +464,7 @@ from .sport5 import Sport5IE
 from .sportbox import SportBoxIE
 from .sportdeutschland import SportDeutschlandIE
 from .srmediathek import SRMediathekIE
+from .ssa import SSAIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .streamcloud import StreamcloudIE
@@ -551,6 +560,7 @@ from .videoweed import VideoWeedIE
 from .vidme import VidmeIE
 from .vidzi import VidziIE
 from .vier import VierIE, VierVideosIE
+from .viewster import ViewsterIE
 from .vimeo import (
     VimeoIE,
     VimeoAlbumIE,
@@ -607,6 +617,11 @@ from .yahoo import (
     YahooSearchIE,
 )
 from .yam import YamIE
+from .yandexmusic import (
+    YandexMusicTrackIE,
+    YandexMusicAlbumIE,
+    YandexMusicPlaylistIE,
+)
 from .yesjapan import YesJapanIE
 from .ynet import YnetIE
 from .youjizz import YouJizzIE
index 34b8b01157bb930937f6f69c4950d8d01c39ed6e..39335b8272295dbf2b640881cd29a6f5b99acaba 100644 (file)
@@ -2,13 +2,12 @@
 from __future__ import unicode_literals
 
 import re
-import json
 
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
-    xpath_text,
     float_or_none,
+    xpath_text,
 )
 
 
@@ -60,6 +59,24 @@ class AdultSwimIE(InfoExtractor):
             'title': 'American Dad - Putting Francine Out of Business',
             'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
         },
+    }, {
+        'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
+        'playlist': [
+            {
+                'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
+                'info_dict': {
+                    'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
+                    'ext': 'flv',
+                    'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
+                    'description': 'Dr. Brule reports live from Wine Country with a special report on wines.  \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
+                },
+            }
+        ],
+        'info_dict': {
+            'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
+            'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
+            'description': 'Dr. Brule reports live from Wine Country with a special report on wines.  \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
+        },
     }]
 
     @staticmethod
@@ -80,6 +97,7 @@ class AdultSwimIE(InfoExtractor):
             for video in collection.get('videos'):
                 if video.get('slug') == slug:
                     return collection, video
+        return None, None
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -90,28 +108,30 @@ class AdultSwimIE(InfoExtractor):
         webpage = self._download_webpage(url, episode_path)
 
         # Extract the value of `bootstrappedData` from the Javascript in the page.
-        bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
-
-        try:
-            bootstrappedData = json.loads(bootstrappedDataJS)
-        except ValueError as ve:
-            errmsg = '%s: Failed to parse JSON ' % episode_path
-            raise ExtractorError(errmsg, cause=ve)
+        bootstrapped_data = self._parse_json(self._search_regex(
+            r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
 
         # Downloading videos from a /videos/playlist/ URL needs to be handled differently.
         # NOTE: We are only downloading one video (the current one) not the playlist
         if is_playlist:
-            collections = bootstrappedData['playlists']['collections']
+            collections = bootstrapped_data['playlists']['collections']
             collection = self.find_collection_by_linkURL(collections, show_path)
             video_info = self.find_video_info(collection, episode_path)
 
             show_title = video_info['showTitle']
             segment_ids = [video_info['videoPlaybackID']]
         else:
-            collections = bootstrappedData['show']['collections']
+            collections = bootstrapped_data['show']['collections']
             collection, video_info = self.find_collection_containing_video(collections, episode_path)
 
-            show = bootstrappedData['show']
+            # Video wasn't found in the collections, let's try `slugged_video`.
+            if video_info is None:
+                if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
+                    video_info = bootstrapped_data['slugged_video']
+                else:
+                    raise ExtractorError('Unable to find video info')
+
+            show = bootstrapped_data['show']
             show_title = show['title']
             segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
 
index 2b257ede7457e2844250f3034f6a8031d31624a9..e15c015fbafd466aaba089bef979843a397f1ab7 100644 (file)
@@ -14,10 +14,10 @@ from ..utils import (
 
 
 class AftenpostenIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html'
+    _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
 
     _TEST = {
-        'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=&section=webtv_serierogprogrammer_sweatshop_sweatshopenglish',
+        'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
         'md5': 'fd828cd29774a729bf4d4425fe192972',
         'info_dict': {
             'id': '21039',
@@ -30,12 +30,7 @@ class AftenpostenIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, display_id)
-
-        video_id = self._html_search_regex(
-            r'data-xs-id="(\d+)"', webpage, 'video id')
+        video_id = self._match_id(url)
 
         data = self._download_xml(
             'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
index 783b53e23035a7bd3f3feac628ff2de8daefbea5..6a35ea463edcafe3b9d7db4c53b9bf0c53198fd0 100644 (file)
@@ -50,6 +50,9 @@ class ARDMediathekIE(InfoExtractor):
         if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
             raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
 
+        if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage:
+            raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
+
         if re.search(r'[\?&]rss($|[=&])', url):
             doc = parse_xml(webpage)
             if doc.tag == 'rss':
index 929dd3cc5550beb1b2da8874763084b5146d2f33..8273bd6c9ae3cdff82052c8f63efc68be97561b3 100644 (file)
@@ -146,6 +146,7 @@ class ArteTVPlus7IE(InfoExtractor):
 
             formats.append(format)
 
+        self._check_formats(formats, video_id)
         self._sort_formats(formats)
 
         info_dict['formats'] = formats
diff --git a/youtube_dl/extractor/beatportpro.py b/youtube_dl/extractor/beatportpro.py
new file mode 100644 (file)
index 0000000..3c7775d
--- /dev/null
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class BeatportProIE(InfoExtractor):
+    _VALID_URL = r'https?://pro\.beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371',
+        'md5': 'b3c34d8639a2f6a7f734382358478887',
+        'info_dict': {
+            'id': '5379371',
+            'display_id': 'synesthesia-original-mix',
+            'ext': 'mp4',
+            'title': 'Froxic - Synesthesia (Original Mix)',
+        },
+    }, {
+        'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896',
+        'md5': 'e44c3025dfa38c6577fbaeb43da43514',
+        'info_dict': {
+            'id': '3756896',
+            'display_id': 'love-and-war-original-mix',
+            'ext': 'mp3',
+            'title': 'Wolfgang Gartner - Love & War (Original Mix)',
+        },
+    }, {
+        'url': 'https://pro.beatport.com/track/birds-original-mix/4991738',
+        'md5': 'a1fd8e8046de3950fd039304c186c05f',
+        'info_dict': {
+            'id': '4991738',
+            'display_id': 'birds-original-mix',
+            'ext': 'mp4',
+            'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        track_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        playables = self._parse_json(
+            self._search_regex(
+                r'window\.Playables\s*=\s*({.+?});', webpage,
+                'playables info', flags=re.DOTALL),
+            track_id)
+
+        track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
+
+        title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
+        if track['mix']:
+            title += ' (' + track['mix'] + ')'
+
+        formats = []
+        for ext, info in track['preview'].items():
+            if not info['url']:
+                continue
+            fmt = {
+                'url': info['url'],
+                'ext': ext,
+                'format_id': ext,
+                'vcodec': 'none',
+            }
+            if ext == 'mp3':
+                fmt['preference'] = 0
+                fmt['acodec'] = 'mp3'
+                fmt['abr'] = 96
+                fmt['asr'] = 44100
+            elif ext == 'mp4':
+                fmt['preference'] = 1
+                fmt['acodec'] = 'aac'
+                fmt['abr'] = 96
+                fmt['asr'] = 44100
+            formats.append(fmt)
+        self._sort_formats(formats)
+
+        images = []
+        for name, info in track['images'].items():
+            image_url = info.get('url')
+            if name == 'dynamic' or not image_url:
+                continue
+            image = {
+                'id': name,
+                'url': image_url,
+                'height': int_or_none(info.get('height')),
+                'width': int_or_none(info.get('width')),
+            }
+            images.append(image)
+
+        return {
+            'id': compat_str(track.get('id')) or track_id,
+            'display_id': track.get('slug') or display_id,
+            'title': title,
+            'formats': formats,
+            'thumbnails': images,
+        }
index 4bcc897c95229ea0ee509fe53443d355309a66aa..809287d144ca7d629bf42bad7ac4e213a323e6dd 100644 (file)
@@ -41,7 +41,7 @@ class BreakIE(InfoExtractor):
             'tbr': media['bitRate'],
             'width': media['width'],
             'height': media['height'],
-        } for media in info['media']]
+        } for media in info['media'] if media.get('mediaPurpose') == 'play']
 
         if not formats:
             formats.append({
index abf8cc280b3d6f1aeefe8219a1fd0ea5d1224be1..0fa720ee8745cfc728b4413b41888e17787fb5db 100644 (file)
@@ -105,6 +105,7 @@ class CloudyIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         file_key = self._search_regex(
-            r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
+            [r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
+            webpage, 'file_key')
 
         return self._extract_video(video_host, video_id, file_key)
index cf39c0c21ee570f7277aacc7ca560a1509afe855..e5245ec3f29eb914b4a311bdf5f3c735e32d2f61 100644 (file)
@@ -839,7 +839,7 @@ class InfoExtractor(object):
                               m3u8_id=None):
 
         formats = [{
-            'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])),
+            'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
             'url': m3u8_url,
             'ext': ext,
             'protocol': 'm3u8',
@@ -883,8 +883,13 @@ class InfoExtractor(object):
                     formats.append({'url': format_url(line)})
                     continue
                 tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
+                format_id = []
+                if m3u8_id:
+                    format_id.append(m3u8_id)
+                last_media_name = last_media.get('NAME') if last_media else None
+                format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
                 f = {
-                    'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])),
+                    'format_id': '-'.join(format_id),
                     'url': format_url(line.strip()),
                     'tbr': tbr,
                     'ext': ext,
@@ -1057,6 +1062,9 @@ class InfoExtractor(object):
     def _get_automatic_captions(self, *args, **kwargs):
         raise NotImplementedError("This method must be implemented by subclasses")
 
+    def _subtitles_timecode(self, seconds):
+        return '%02d:%02d:%02d.%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
index 42b20a46ddefc1e4a7e66aacd0d959a1e062618f..4f67c3aacc5dec5410b76d08728eecaf429f222b 100644 (file)
@@ -46,13 +46,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
 
     _TESTS = [
         {
-            'url': 'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
-            'md5': '392c4b85a60a90dc4792da41ce3144eb',
+            'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
+            'md5': '2137c41a8e78554bb09225b8eb322406',
             'info_dict': {
-                'id': 'x33vw9',
+                'id': 'x2iuewm',
                 'ext': 'mp4',
-                'uploader': 'Amphora Alex and Van .',
-                'title': 'Tutoriel de Youtubeur"DL DES VIDEO DE YOUTUBE"',
+                'uploader': 'IGN',
+                'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
             }
         },
         # Vevo video
diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py
new file mode 100644 (file)
index 0000000..7173371
--- /dev/null
@@ -0,0 +1,98 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+)
+
+
+class EaglePlatformIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                    (?:
+                        eagleplatform:(?P<custom_host>[^/]+):|
+                        https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
+                    )
+                    (?P<id>\d+)
+                '''
+    _TESTS = [{
+        # http://lenta.ru/news/2015/03/06/navalny/
+        'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
+        'md5': '0b7994faa2bd5c0f69a3db6db28d078d',
+        'info_dict': {
+            'id': '227304',
+            'ext': 'mp4',
+            'title': 'Навальный вышел на свободу',
+            'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 87,
+            'view_count': int,
+            'age_limit': 0,
+        },
+    }, {
+        # http://muz-tv.ru/play/7129/
+        # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
+        'url': 'eagleplatform:media.clipyou.ru:12820',
+        'md5': '6c2ebeab03b739597ce8d86339d5a905',
+        'info_dict': {
+            'id': '12820',
+            'ext': 'mp4',
+            'title': "'O Sole Mio",
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 216,
+            'view_count': int,
+        },
+    }]
+
+    def _handle_error(self, response):
+        status = int_or_none(response.get('status', 200))
+        if status != 200:
+            raise ExtractorError(' '.join(response['errors']), expected=True)
+
+    def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
+        response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
+        self._handle_error(response)
+        return response
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
+
+        player_data = self._download_json(
+            'http://%s/api/player_data?id=%s' % (host, video_id), video_id)
+
+        media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
+
+        title = media['title']
+        description = media.get('description')
+        thumbnail = media.get('snapshot')
+        duration = int_or_none(media.get('duration'))
+        view_count = int_or_none(media.get('views'))
+
+        age_restriction = media.get('age_restriction')
+        age_limit = None
+        if age_restriction:
+            age_limit = 0 if age_restriction == 'allow_all' else 18
+
+        m3u8_data = self._download_json(
+            media['sources']['secure_m3u8']['auto'],
+            video_id, 'Downloading m3u8 JSON')
+
+        formats = self._extract_m3u8_formats(
+            m3u8_data['data'][0], video_id,
+            'mp4', entry_protocol='m3u8_native')
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'view_count': view_count,
+            'age_limit': age_limit,
+            'formats': formats,
+        }
index fb5dbbe2b0c7d9bd15b87426e446ce73f903a6eb..0b61ea0ba60218043156d4f90680ff0348e827c7 100644 (file)
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 
 import json
 import random
-import re
 
 from .common import InfoExtractor
 from ..compat import (
@@ -103,20 +102,23 @@ class EightTracksIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        playlist_id = mobj.group('id')
+        playlist_id = self._match_id(url)
 
         webpage = self._download_webpage(url, playlist_id)
 
-        json_like = self._search_regex(
-            r"(?s)PAGE.mix = (.*?);\n", webpage, 'trax information')
-        data = json.loads(json_like)
+        data = self._parse_json(
+            self._search_regex(
+                r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'),
+            playlist_id)
 
         session = str(random.randint(0, 1000000000))
         mix_id = data['id']
         track_count = data['tracks_count']
         duration = data['duration']
         avg_song_duration = float(duration) / track_count
+        # duration is sometimes negative, use predefined avg duration
+        if avg_song_duration <= 0:
+            avg_song_duration = 300
         first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
         next_url = first_url
         entries = []
diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py
new file mode 100644 (file)
index 0000000..2b4691a
--- /dev/null
@@ -0,0 +1,41 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class FootyRoomIE(InfoExtractor):
+    _VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)'
+    _TEST = {
+        'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
+        'info_dict': {
+            'id': 'schalke-04-0-2-real-madrid-2015-02',
+            'title': 'Schalke 04 0 – 2 Real Madrid',
+        },
+        'playlist_count': 3,
+    }
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        playlist = self._parse_json(
+            self._search_regex(
+                r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'),
+            playlist_id)
+
+        playlist_title = self._og_search_title(webpage)
+
+        entries = []
+        for video in playlist:
+            payload = video.get('payload')
+            if not payload:
+                continue
+            playwire_url = self._search_regex(
+                r'data-config="([^"]+)"', payload,
+                'playwire url', default=None)
+            if playwire_url:
+                entries.append(self.url_result(playwire_url, 'Playwire'))
+
+        return self.playlist_result(entries, playlist_id, playlist_title)
index a49fc1151cf324f5e4b61cbd4f1d586718410626..dd87257c465983dcda30a6faf5dbd7bc0950560c 100644 (file)
@@ -50,7 +50,6 @@ class FunnyOrDieIE(InfoExtractor):
         bitrates.sort()
 
         formats = []
-
         for bitrate in bitrates:
             for link in links:
                 formats.append({
@@ -59,6 +58,13 @@ class FunnyOrDieIE(InfoExtractor):
                     'vbr': bitrate,
                 })
 
+        subtitles = {}
+        for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):
+            subtitles[src_lang] = [{
+                'ext': src.split('/')[-1],
+                'url': 'http://www.funnyordie.com%s' % src,
+            }]
+
         post_json = self._search_regex(
             r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
         post = json.loads(post_json)
@@ -69,4 +75,5 @@ class FunnyOrDieIE(InfoExtractor):
             'description': post.get('description'),
             'thumbnail': post.get('picture'),
             'formats': formats,
+            'subtitles': subtitles,
         }
diff --git a/youtube_dl/extractor/gazeta.py b/youtube_dl/extractor/gazeta.py
new file mode 100644 (file)
index 0000000..ea32b62
--- /dev/null
@@ -0,0 +1,38 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class GazetaIE(InfoExtractor):
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
+    _TESTS = [{
+        'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
+        'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
+        'info_dict': {
+            'id': '205566',
+            'ext': 'mp4',
+            'title': '«70–80 процентов гражданских в Донецке на грани голода»',
+            'description': 'md5:38617526050bd17b234728e7f9620a71',
+            'thumbnail': 're:^https?://.*\.jpg',
+        },
+    }, {
+        'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+
+        display_id = mobj.group('id')
+        embed_url = '%s?p=embed' % mobj.group('url')
+        embed_page = self._download_webpage(
+            embed_url, display_id, 'Downloading embed page')
+
+        video_id = self._search_regex(
+            r'<div[^>]*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id')
+
+        return self.url_result(
+            'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform')
index 5dc53685cf36f0a9adf8e1896da7db80ab0ba385..4e6927b08b032718e09fdbe0b35595456d587156 100644 (file)
@@ -570,6 +570,45 @@ class GenericIE(InfoExtractor):
                 'title': 'John Carlson Postgame 2/25/15',
             },
         },
+        # Eagle.Platform embed (generic URL)
+        {
+            'url': 'http://lenta.ru/news/2015/03/06/navalny/',
+            'info_dict': {
+                'id': '227304',
+                'ext': 'mp4',
+                'title': 'Навальный вышел на свободу',
+                'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 87,
+                'view_count': int,
+                'age_limit': 0,
+            },
+        },
+        # ClipYou (Eagle.Platform) embed (custom URL)
+        {
+            'url': 'http://muz-tv.ru/play/7129/',
+            'info_dict': {
+                'id': '12820',
+                'ext': 'mp4',
+                'title': "'O Sole Mio",
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 216,
+                'view_count': int,
+            },
+        },
+        # Pladform embed
+        {
+            'url': 'http://muz-tv.ru/kinozal/view/7400/',
+            'info_dict': {
+                'id': '100183293',
+                'ext': 'mp4',
+                'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
+                'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 694,
+                'age_limit': 0,
+            },
+        },
         # RSS feed with enclosure
         {
             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
@@ -1155,6 +1194,24 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
 
+        # Look for Eagle.Platform embeds
+        mobj = re.search(
+            r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'EaglePlatform')
+
+        # Look for ClipYou (uses Eagle.Platform) embeds
+        mobj = re.search(
+            r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
+        if mobj is not None:
+            return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
+
+        # Look for Pladform embeds
+        mobj = re.search(
+            r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'Pladform')
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True
index 29638a1948ff1230403f313f1c7725ab69224434..8a95793cae07734e67340bf49db088cdb043d1cb 100644 (file)
@@ -20,7 +20,7 @@ class GloboIE(InfoExtractor):
     _VALID_URL = 'https?://.+?\.globo\.com/(?P<id>.+)'
 
     _API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
-    _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=2.9.9.50&resource_id=%s'
+    _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
 
     _VIDEOID_REGEXES = [
         r'\bdata-video-id="(\d+)"',
index 8094cc2e487f2880a66178d0a07c97a2ef9432f5..d0720ff561c16e8c0816c5ff7ab333e54c297dbc 100644 (file)
@@ -2,7 +2,6 @@
 
 from __future__ import unicode_literals
 
-import json
 import re
 
 from .common import InfoExtractor
@@ -15,10 +14,10 @@ class JeuxVideoIE(InfoExtractor):
         'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
         'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
         'info_dict': {
-            'id': '5182',
+            'id': '114765',
             'ext': 'mp4',
-            'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
-            'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
+            'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
+            'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
         },
     }
 
@@ -26,26 +25,29 @@ class JeuxVideoIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         title = mobj.group(1)
         webpage = self._download_webpage(url, title)
-        xml_link = self._html_search_regex(
-            r'<param name="flashvars" value="config=(.*?)" />',
+        title = self._html_search_meta('name', webpage)
+        config_url = self._html_search_regex(
+            r'data-src="(/contenu/medias/video.php.*?)"',
             webpage, 'config URL')
+        config_url = 'http://www.jeuxvideo.com' + config_url
 
         video_id = self._search_regex(
-            r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
-            xml_link, 'video ID')
+            r'id=(\d+)',
+            config_url, 'video ID')
 
-        config = self._download_xml(
-            xml_link, title, 'Downloading XML config')
-        info_json = config.find('format.json').text
-        info = json.loads(info_json)['versions'][0]
+        config = self._download_json(
+            config_url, title, 'Downloading JSON config')
 
-        video_url = 'http://video720.jeuxvideo.com/' + info['file']
+        formats = [{
+            'url': source['file'],
+            'format_id': source['label'],
+            'resolution': source['label'],
+        } for source in reversed(config['sources'])]
 
         return {
             'id': video_id,
-            'title': config.find('titre_video').text,
-            'ext': 'mp4',
-            'url': video_url,
+            'title': title,
+            'formats': formats,
             'description': self._og_search_description(webpage),
-            'thumbnail': config.find('image').text,
+            'thumbnail': config.get('image'),
         }
diff --git a/youtube_dl/extractor/kanalplay.py b/youtube_dl/extractor/kanalplay.py
new file mode 100644 (file)
index 0000000..2bb0780
--- /dev/null
@@ -0,0 +1,96 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    float_or_none,
+)
+
+
+class KanalPlayIE(InfoExtractor):
+    IE_DESC = 'Kanal 5/9/11 Play'
+    _VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
+        'info_dict': {
+            'id': '3270012277',
+            'ext': 'flv',
+            'title': 'Saknar både dusch och avlopp',
+            'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
+            'duration': 2636.36,
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }, {
+        'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
+        'only_matching': True,
+    }]
+
+    def _fix_subtitles(self, subs):
+        return '\r\n\r\n'.join(
+            '%s\r\n%s --> %s\r\n%s'
+            % (
+                num,
+                self._subtitles_timecode(item['startMillis'] / 1000.0),
+                self._subtitles_timecode(item['endMillis'] / 1000.0),
+                item['text'],
+            ) for num, item in enumerate(subs, 1))
+
+    def _get_subtitles(self, channel_id, video_id):
+        subs = self._download_json(
+            'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
+            video_id, 'Downloading subtitles JSON', fatal=False)
+        return {'se': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        channel_id = mobj.group('channel_id')
+
+        video = self._download_json(
+            'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
+            video_id)
+
+        reasons_for_no_streams = video.get('reasonsForNoStreams')
+        if reasons_for_no_streams:
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
+                expected=True)
+
+        title = video['title']
+        description = video.get('description')
+        duration = float_or_none(video.get('length'), 1000)
+        thumbnail = video.get('posterUrl')
+
+        stream_base_url = video['streamBaseUrl']
+
+        formats = [{
+            'url': stream_base_url,
+            'play_path': stream['source'],
+            'ext': 'flv',
+            'tbr': float_or_none(stream.get('bitrate'), 1000),
+            'rtmp_real_time': True,
+        } for stream in video['streams']]
+        self._sort_formats(formats)
+
+        subtitles = {}
+        if video.get('hasSubtitle'):
+            subtitles = self.extract_subtitles(channel_id, video_id)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+            'subtitles': subtitles,
+        }
index 85eee141b119519e9a6aac9a2fd8bfb1e05419a0..1484ac0d267697dceb34c9e406e3a26b26a37f54 100644 (file)
@@ -88,12 +88,13 @@ class LetvIE(InfoExtractor):
         play_json_req = compat_urllib_request.Request(
             'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
         )
-        play_json_req.add_header(
-            'Ytdl-request-proxy',
-            self._downloader.params.get('cn_verification_proxy'))
+        cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
+        if cn_verification_proxy:
+            play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
+
         play_json = self._download_json(
             play_json_req,
-            media_id, 'playJson data')
+            media_id, 'Downloading playJson data')
 
         # Check for errors
         playstatus = play_json['playstatus']
index 3642089f7802238d77ec5c18e4f96b5cb21e3d72..2467f8bdd35304a57cc6bd951fa14fd32a25cd4c 100644 (file)
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
 
 import re
 import json
+import itertools
 
 from .common import InfoExtractor
 from ..compat import (
@@ -40,6 +41,13 @@ class LivestreamIE(InfoExtractor):
             'id': '2245590',
         },
         'playlist_mincount': 4,
+    }, {
+        'url': 'http://new.livestream.com/chess24/tatasteelchess',
+        'info_dict': {
+            'title': 'Tata Steel Chess',
+            'id': '3705884',
+        },
+        'playlist_mincount': 60,
     }, {
         'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
         'only_matching': True,
@@ -117,6 +125,30 @@ class LivestreamIE(InfoExtractor):
             'view_count': video_data.get('views'),
         }
 
+    def _extract_event(self, info):
+        event_id = compat_str(info['id'])
+        account = compat_str(info['owner_account_id'])
+        root_url = (
+            'https://new.livestream.com/api/accounts/{account}/events/{event}/'
+            'feed.json'.format(account=account, event=event_id))
+
+        def _extract_videos():
+            last_video = None
+            for i in itertools.count(1):
+                if last_video is None:
+                    info_url = root_url
+                else:
+                    info_url = '{root}?&id={id}&newer=-1&type=video'.format(
+                        root=root_url, id=last_video)
+                videos_info = self._download_json(info_url, event_id, 'Downloading page {0}'.format(i))['data']
+                videos_info = [v['data'] for v in videos_info if v['type'] == 'video']
+                if not videos_info:
+                    break
+                for v in videos_info:
+                    yield self._extract_video_info(v)
+                last_video = videos_info[-1]['id']
+        return self.playlist_result(_extract_videos(), event_id, info['full_name'])
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
@@ -144,14 +176,13 @@ class LivestreamIE(InfoExtractor):
                 result = result and compat_str(vdata['data']['id']) == vid
             return result
 
-        videos = [self._extract_video_info(video_data['data'])
-                  for video_data in info['feed']['data']
-                  if is_relevant(video_data, video_id)]
         if video_id is None:
             # This is an event page:
-            return self.playlist_result(
-                videos, '%s' % info['id'], info['full_name'])
+            return self._extract_event(info)
         else:
+            videos = [self._extract_video_info(video_data['data'])
+                      for video_data in info['feed']['data']
+                      if is_relevant(video_data, video_id)]
             if not videos:
                 raise ExtractorError('Cannot find video %s' % video_id)
             return videos[0]
index 4c18904169d3f69a0bf7e95fb21d98218bca7e91..7fb4e57dfb81029c0fc46dbab83ced5b2fbc766b 100644 (file)
@@ -41,7 +41,7 @@ class NiconicoIE(InfoExtractor):
         },
     }
 
-    _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
     _NETRC_MACHINE = 'niconico'
     # Determine whether the downloader used authentication to download video
     _AUTHENTICATED = False
@@ -76,8 +76,7 @@ class NiconicoIE(InfoExtractor):
         return True
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = self._match_id(url)
 
         # Get video webpage. We are not actually interested in it, but need
         # the cookies in order to be able to download the info webpage
index 9c01eb0af8067948878581a0a30d9be326f990e9..557dffa46846ff8a4c94f6bb102a186fa1ce5eb8 100644 (file)
@@ -219,7 +219,8 @@ class NPOLiveIE(NPOBaseIE):
         if streams:
             for stream in streams:
                 stream_type = stream.get('type').lower()
-                if stream_type == 'ss':
+                # smooth streaming is not supported
+                if stream_type in ['ss', 'ms']:
                     continue
                 stream_info = self._download_json(
                     'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp'
@@ -242,6 +243,7 @@ class NPOLiveIE(NPOBaseIE):
                 else:
                     formats.append({
                         'url': stream_url,
+                        'preference': -10,
                     })
 
         self._sort_formats(formats)
index 1e4cfa2e7c8c5e3ae05c7d5fbc11242a334a5322..bff36f9d3f24cad293144d8e216faf5eeefed92c 100644 (file)
@@ -149,9 +149,6 @@ class NRKTVIE(InfoExtractor):
         }
     ]
 
-    def _seconds2str(self, s):
-        return '%02d:%02d:%02d.%03d' % (s / 3600, (s % 3600) / 60, s % 60, (s % 1) * 1000)
-
     def _debug_print(self, txt):
         if self._downloader.params.get('verbose', False):
             self.to_screen('[debug] %s' % txt)
@@ -168,8 +165,8 @@ class NRKTVIE(InfoExtractor):
         for pos, p in enumerate(ps):
             begin = parse_duration(p.get('begin'))
             duration = parse_duration(p.get('dur'))
-            starttime = self._seconds2str(begin)
-            endtime = self._seconds2str(begin + duration)
+            starttime = self._subtitles_timecode(begin)
+            endtime = self._subtitles_timecode(begin + duration)
             srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
         return {lang: [
             {'ext': 'ttml', 'url': url},
index 4e293392b3d39b46ad1612d884068a2dbfaeef23..ca1a5bb3cd520fd9add9f195c89d8ac13619467b 100644 (file)
@@ -11,6 +11,11 @@ from ..utils import (
     HEADRequest,
     unified_strdate,
     ExtractorError,
+    strip_jsonp,
+    int_or_none,
+    float_or_none,
+    determine_ext,
+    remove_end,
 )
 
 
@@ -197,3 +202,92 @@ class ORFFM4IE(InfoExtractor):
             'description': data['subtitle'],
             'entries': entries
         }
+
+
+class ORFIPTVIE(InfoExtractor):
+    IE_NAME = 'orf:iptv'
+    IE_DESC = 'iptv.ORF.at'
+    _VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://iptv.orf.at/stories/2267952',
+        'md5': '26ffa4bab6dbce1eee78bbc7021016cd',
+        'info_dict': {
+            'id': '339775',
+            'ext': 'flv',
+            'title': 'Kreml-Kritiker Nawalny wieder frei',
+            'description': 'md5:6f24e7f546d364dacd0e616a9e409236',
+            'duration': 84.729,
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'upload_date': '20150306',
+        },
+    }
+
+    def _real_extract(self, url):
+        story_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'http://iptv.orf.at/stories/%s' % story_id, story_id)
+
+        video_id = self._search_regex(
+            r'data-video(?:id)?="(\d+)"', webpage, 'video id')
+
+        data = self._download_json(
+            'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
+            video_id)[0]
+
+        duration = float_or_none(data['duration'], 1000)
+
+        video = data['sources']['default']
+        load_balancer_url = video['loadBalancerUrl']
+        abr = int_or_none(video.get('audioBitrate'))
+        vbr = int_or_none(video.get('bitrate'))
+        fps = int_or_none(video.get('videoFps'))
+        width = int_or_none(video.get('videoWidth'))
+        height = int_or_none(video.get('videoHeight'))
+        thumbnail = video.get('preview')
+
+        rendition = self._download_json(
+            load_balancer_url, video_id, transform_source=strip_jsonp)
+
+        f = {
+            'abr': abr,
+            'vbr': vbr,
+            'fps': fps,
+            'width': width,
+            'height': height,
+        }
+
+        formats = []
+        for format_id, format_url in rendition['redirect'].items():
+            if format_id == 'rtmp':
+                ff = f.copy()
+                ff.update({
+                    'url': format_url,
+                    'format_id': format_id,
+                })
+                formats.append(ff)
+            elif determine_ext(format_url) == 'f4m':
+                formats.extend(self._extract_f4m_formats(
+                    format_url, video_id, f4m_id=format_id))
+            elif determine_ext(format_url) == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', m3u8_id=format_id))
+            else:
+                continue
+        self._sort_formats(formats)
+
+        title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
+        description = self._og_search_description(webpage)
+        upload_date = unified_strdate(self._html_search_meta(
+            'dc.date', webpage, 'upload date'))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py
new file mode 100644 (file)
index 0000000..abde34b
--- /dev/null
@@ -0,0 +1,90 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    xpath_text,
+    qualities,
+)
+
+
+class PladformIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            (?:
+                                out\.pladform\.ru/player|
+                                static\.pladform\.ru/player\.swf
+                            )
+                            \?.*\bvideoid=|
+                            video\.pladform\.ru/catalog/video/videoid/
+                        )
+                        (?P<id>\d+)
+                    '''
+    _TESTS = [{
+        # http://muz-tv.ru/kinozal/view/7400/
+        'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
+        'md5': '61f37b575dd27f1bb2e1854777fe31f4',
+        'info_dict': {
+            'id': '100183293',
+            'ext': 'mp4',
+            'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
+            'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 694,
+            'age_limit': 0,
+        },
+    }, {
+        'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
+        'only_matching': True,
+    }, {
+        'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        video = self._download_xml(
+            'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
+            video_id)
+
+        if video.tag == 'error':
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, video.text),
+                expected=True)
+
+        quality = qualities(('ld', 'sd', 'hd'))
+
+        formats = [{
+            'url': src.text,
+            'format_id': src.get('quality'),
+            'quality': quality(src.get('quality')),
+        } for src in video.findall('./src')]
+        self._sort_formats(formats)
+
+        webpage = self._download_webpage(
+            'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
+            video_id)
+
+        title = self._og_search_title(webpage, fatal=False) or xpath_text(
+            video, './/title', 'title', fatal=True)
+        description = self._search_regex(
+            r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
+        thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
+            video, './/cover', 'cover')
+
+        duration = int_or_none(xpath_text(video, './/time', 'duration'))
+        age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'age_limit': age_limit,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py
new file mode 100644 (file)
index 0000000..01cc3d9
--- /dev/null
@@ -0,0 +1,69 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+from ..utils import ExtractorError
+
+
+class PrimeShareTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>[\da-zA-Z]+)'
+
+    _TEST = {
+        'url': 'http://primeshare.tv/download/238790B611',
+        'md5': 'b92d9bf5461137c36228009f31533fbc',
+        'info_dict': {
+            'id': '238790B611',
+            'ext': 'mp4',
+            'title': 'Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona',
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        if '>File not exist<' in webpage:
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+        fields = dict(re.findall(r'''(?x)<input\s+
+            type="hidden"\s+
+            name="([^"]+)"\s+
+            (?:id="[^"]+"\s+)?
+            value="([^"]*)"
+            ''', webpage))
+
+        headers = {
+            'Referer': url,
+            'Content-Type': 'application/x-www-form-urlencoded',
+        }
+
+        wait_time = int(self._search_regex(
+            r'var\s+cWaitTime\s*=\s*(\d+)',
+            webpage, 'wait time', default=7)) + 1
+        self._sleep(wait_time, video_id)
+
+        req = compat_urllib_request.Request(
+            url, compat_urllib_parse.urlencode(fields), headers)
+        video_page = self._download_webpage(
+            req, video_id, 'Downloading video page')
+
+        video_url = self._search_regex(
+            r"url\s*:\s*'([^']+\.primeshare\.tv(?::443)?/file/[^']+)'",
+            video_page, 'video url')
+
+        title = self._html_search_regex(
+            r'<h1>Watch\s*(?:&nbsp;)?\s*\((.+?)(?:\s*\[\.\.\.\])?\)\s*(?:&nbsp;)?\s*<strong>',
+            video_page, 'title')
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'ext': 'mp4',
+        }
index b42442d127c13e69fc81cb27e71cf117d2cb96b2..13f07107753e0945e4d9e203a2bc6dd8814d4868 100644 (file)
@@ -127,6 +127,47 @@ class RTVEALaCartaIE(InfoExtractor):
             for s in subs)
 
 
+class RTVEInfantilIE(InfoExtractor):
+    IE_NAME = 'rtve.es:infantil'
+    IE_DESC = 'RTVE infantil'
+    _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
+
+    _TESTS = [{
+        'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
+        'md5': '915319587b33720b8e0357caaa6617e6',
+        'info_dict': {
+            'id': '3040283',
+            'ext': 'mp4',
+            'title': 'Maneras de vivir',
+            'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
+            'duration': 357.958,
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        info = self._download_json(
+            'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
+            video_id)['page']['items'][0]
+
+        webpage = self._download_webpage(url, video_id)
+        vidplayer_id = self._search_regex(
+            r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
+
+        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
+        png = self._download_webpage(png_url, video_id, 'Downloading url information')
+        video_url = _decrypt_url(png)
+
+        return {
+            'id': video_id,
+            'ext': 'mp4',
+            'title': info['title'],
+            'url': video_url,
+            'thumbnail': info.get('image'),
+            'duration': float_or_none(info.get('duration'), scale=1000),
+        }
+
+
 class RTVELiveIE(InfoExtractor):
     IE_NAME = 'rtve.es:live'
     IE_DESC = 'RTVE.es live streams'
diff --git a/youtube_dl/extractor/ssa.py b/youtube_dl/extractor/ssa.py
new file mode 100644 (file)
index 0000000..13101c7
--- /dev/null
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    unescapeHTML,
+    parse_duration,
+)
+
+
+class SSAIE(InfoExtractor):
+    _VALID_URL = r'http://ssa\.nls\.uk/film/(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://ssa.nls.uk/film/3561',
+        'info_dict': {
+            'id': '3561',
+            'ext': 'flv',
+            'title': 'SHETLAND WOOL',
+            'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
+            'duration': 900,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        streamer = self._search_regex(
+            r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer')
+        play_path = self._search_regex(
+            r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0]
+
+        def search_field(field_name, fatal=False):
+            return self._search_regex(
+                r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
+                webpage, 'title', fatal=fatal)
+
+        title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]')
+        description = unescapeHTML(search_field('Description'))
+        duration = parse_duration(search_field('Running time'))
+        thumbnail = self._search_regex(
+            r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False)
+
+        return {
+            'id': video_id,
+            'url': streamer,
+            'play_path': play_path,
+            'ext': 'flv',
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'thumbnail': thumbnail,
+        }
index 5793dbc1085a86fdf573a432805be129dc62de94..7cb06f351e5b388142b00b51aeba69a1ecfef250 100644 (file)
@@ -53,10 +53,10 @@ class TeamcocoIE(InfoExtractor):
         embed = self._download_webpage(
             embed_url, video_id, 'Downloading embed page')
 
-        encoded_data = self._search_regex(
-            r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data')
+        player_data = self._parse_json(self._search_regex(
+            r'Y\.Ginger\.Module\.Player\((\{.*?\})\);', embed, 'player data'), video_id)
         data = self._parse_json(
-            base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id)
+            base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id)
 
         formats = []
         get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
index 8af1361473ac338d45c0bc0f512953c684b9681b..cbdaf9c7ab0d8c878011b10f8c59d71c851007c5 100644 (file)
@@ -85,6 +85,14 @@ class TwitchBaseIE(InfoExtractor):
             raise ExtractorError(
                 'Unable to login: %s' % m.group('msg').strip(), expected=True)
 
+    def _prefer_source(self, formats):
+        try:
+            source = next(f for f in formats if f['format_id'] == 'Source')
+            source['preference'] = 10
+        except StopIteration:
+            pass  # No Source stream present
+        self._sort_formats(formats)
+
 
 class TwitchItemBaseIE(TwitchBaseIE):
     def _download_info(self, item, item_id):
@@ -209,6 +217,7 @@ class TwitchVodIE(TwitchItemBaseIE):
             '%s/vod/%s?nauth=%s&nauthsig=%s'
             % (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
             item_id, 'mp4')
+        self._prefer_source(formats)
         info['formats'] = formats
         return info
 
@@ -349,21 +358,14 @@ class TwitchStreamIE(TwitchBaseIE):
             'p': random.randint(1000000, 10000000),
             'player': 'twitchweb',
             'segment_preference': '4',
-            'sig': access_token['sig'],
-            'token': access_token['token'],
+            'sig': access_token['sig'].encode('utf-8'),
+            'token': access_token['token'].encode('utf-8'),
         }
-
         formats = self._extract_m3u8_formats(
             '%s/api/channel/hls/%s.m3u8?%s'
-            % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
+            % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query)),
             channel_id, 'mp4')
-
-        # prefer the 'source' stream, the others are limited to 30 fps
-        def _sort_source(f):
-            if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source':
-                return 1
-            return 0
-        formats = sorted(formats, key=_sort_source)
+        self._prefer_source(formats)
 
         view_count = stream.get('viewers')
         timestamp = parse_iso8601(stream.get('created_at'))
index 5c89824c164272358923e245e60a4f86466287c4..bd953fb4cc212f50dce2cac624c9391a14e82898 100644 (file)
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
@@ -28,12 +26,11 @@ class VidmeIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL')
+        video_url = self._html_search_regex(
+            r'<source src="([^"]+)"', webpage, 'video URL')
 
         title = self._og_search_title(webpage)
         description = self._og_search_description(webpage, default='')
@@ -44,13 +41,10 @@ class VidmeIE(InfoExtractor):
         duration = float_or_none(self._html_search_regex(
             r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
         view_count = str_to_int(self._html_search_regex(
-            r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
+            r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
         like_count = str_to_int(self._html_search_regex(
             r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
             webpage, 'like count', fatal=False))
-        comment_count = str_to_int(self._html_search_regex(
-            r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
-            webpage, 'comment count', fatal=False))
 
         return {
             'id': video_id,
@@ -64,5 +58,4 @@ class VidmeIE(InfoExtractor):
             'duration': duration,
             'view_count': view_count,
             'like_count': like_count,
-            'comment_count': comment_count,
         }
diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py
new file mode 100644 (file)
index 0000000..1742e66
--- /dev/null
@@ -0,0 +1,129 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+
+
+class ViewsterIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?viewster\.com/movie/(?P<id>\d+-\d+-\d+)'
+    _TESTS = [{
+        # movielink, paymethod=fre
+        'url': 'http://www.viewster.com/movie/1293-19341-000/hout-wood/',
+        'playlist': [{
+            'md5': '8f9d94b282d80c42b378dffdbb11caf3',
+            'info_dict': {
+                'id': '1293-19341-000-movie',
+                'ext': 'flv',
+                'title': "'Hout' (Wood) - Movie",
+            },
+        }],
+        'info_dict': {
+            'id': '1293-19341-000',
+            'title': "'Hout' (Wood)",
+            'description': 'md5:925733185a9242ef96f436937683f33b',
+        }
+    }, {
+        # movielink, paymethod=adv
+        'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
+        'playlist': [{
+            'md5': '77a005453ca7396cbe3d35c9bea30aef',
+            'info_dict': {
+                'id': '1140-11855-000-movie',
+                'ext': 'flv',
+                'title': "THE LISTENING PROJECT - Movie",
+            },
+        }],
+        'info_dict': {
+            'id': '1140-11855-000',
+            'title': "THE LISTENING PROJECT",
+            'description': 'md5:714421ae9957e112e672551094bf3b08',
+        }
+    }, {
+        # direct links, no movielink
+        'url': 'http://www.viewster.com/movie/1198-56411-000/sinister/',
+        'playlist': [{
+            'md5': '0307b7eac6bfb21ab0577a71f6eebd8f',
+            'info_dict': {
+                'id': '1198-56411-000-trailer',
+                'ext': 'mp4',
+                'title': "Sinister - Trailer",
+            },
+        }, {
+            'md5': '80b9ee3ad69fb368f104cb5d9732ae95',
+            'info_dict': {
+                'id': '1198-56411-000-behind-scenes',
+                'ext': 'mp4',
+                'title': "Sinister - Behind Scenes",
+            },
+        }, {
+            'md5': '3b3ea897ecaa91fca57a8a94ac1b15c5',
+            'info_dict': {
+                'id': '1198-56411-000-scene-from-movie',
+                'ext': 'mp4',
+                'title': "Sinister - Scene from movie",
+            },
+        }],
+        'info_dict': {
+            'id': '1198-56411-000',
+            'title': "Sinister",
+            'description': 'md5:014c40b0488848de9683566a42e33372',
+        }
+    }]
+
+    _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        request = compat_urllib_request.Request(
+            'http://api.live.viewster.com/api/v1/movie/%s' % video_id)
+        request.add_header('Accept', self._ACCEPT_HEADER)
+
+        movie = self._download_json(
+            request, video_id, 'Downloading movie metadata JSON')
+
+        title = movie.get('title') or movie['original_title']
+        description = movie.get('synopsis')
+        thumbnail = movie.get('large_artwork') or movie.get('artwork')
+
+        entries = []
+        for clip in movie['play_list']:
+            entry = None
+
+            # movielink api
+            link_request = clip.get('link_request')
+            if link_request:
+                request = compat_urllib_request.Request(
+                    'http://api.live.viewster.com/api/v1/movielink?movieid=%(movieid)s&action=%(action)s&paymethod=%(paymethod)s&price=%(price)s&currency=%(currency)s&language=%(language)s&subtitlelanguage=%(subtitlelanguage)s&ischromecast=%(ischromecast)s'
+                    % link_request)
+                request.add_header('Accept', self._ACCEPT_HEADER)
+
+                movie_link = self._download_json(
+                    request, video_id, 'Downloading movie link JSON', fatal=False)
+
+                if movie_link:
+                    formats = self._extract_f4m_formats(
+                        movie_link['url'] + '&hdcore=3.2.0&plugin=flowplayer-3.2.0.1', video_id)
+                    self._sort_formats(formats)
+                    entry = {
+                        'formats': formats,
+                    }
+
+            # direct link
+            clip_url = clip.get('clip_data', {}).get('url')
+            if clip_url:
+                entry = {
+                    'url': clip_url,
+                    'ext': 'mp4',
+                }
+
+            if entry:
+                entry.update({
+                    'id': '%s-%s' % (video_id, clip['canonical_title']),
+                    'title': '%s - %s' % (title, clip['title']),
+                })
+                entries.append(entry)
+
+        playlist = self.playlist_result(entries, video_id, title, description)
+        playlist['thumbnail'] = thumbnail
+        return playlist
index 8f540f5780570d06fa10e695555026c537b7c0f0..bd09652cd96340155cc084f6814df4fbecd6f707 100644 (file)
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
 import json
 import re
 import itertools
-import hashlib
 
 from .common import InfoExtractor
 from ..compat import (
@@ -20,6 +19,7 @@ from ..utils import (
     RegexNotFoundError,
     smuggle_url,
     std_headers,
+    unified_strdate,
     unsmuggle_url,
     urlencode_postdata,
 )
@@ -38,7 +38,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
         self.report_login()
         login_url = 'https://vimeo.com/log_in'
         webpage = self._download_webpage(login_url, None, False)
-        token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
+        token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token')
         data = urlencode_postdata({
             'email': username,
             'password': password,
@@ -140,6 +140,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
                 'description': 'md5:8678b246399b070816b12313e8b4eb5c',
                 'uploader_id': 'atencio',
                 'uploader': 'Peter Atencio',
+                'upload_date': '20130927',
                 'duration': 187,
             },
         },
@@ -176,17 +177,15 @@ class VimeoIE(VimeoBaseInfoExtractor):
         password = self._downloader.params.get('videopassword', None)
         if password is None:
             raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
-        token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
-        data = compat_urllib_parse.urlencode({
+        token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token')
+        data = urlencode_postdata({
             'password': password,
             'token': token,
         })
-        # I didn't manage to use the password with https
-        if url.startswith('https'):
-            pass_url = url.replace('https', 'http')
-        else:
-            pass_url = url
-        password_request = compat_urllib_request.Request(pass_url + '/password', data)
+        if url.startswith('http://'):
+            # vimeo only supports https now, but the user can give an http url
+            url = url.replace('http://', 'https://')
+        password_request = compat_urllib_request.Request(url + '/password', data)
         password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
         password_request.add_header('Cookie', 'xsrft=%s' % token)
         return self._download_webpage(
@@ -223,12 +222,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
         video_id = mobj.group('id')
         orig_url = url
         if mobj.group('pro') or mobj.group('player'):
-            url = 'http://player.vimeo.com/video/' + video_id
-
-        password = self._downloader.params.get('videopassword', None)
-        if password:
-            headers['Cookie'] = '%s_password=%s' % (
-                video_id, hashlib.md5(password.encode('utf-8')).hexdigest())
+            url = 'https://player.vimeo.com/video/' + video_id
 
         # Retrieve video webpage to extract further information
         request = compat_urllib_request.Request(url, None, headers)
@@ -323,9 +317,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
 
         # Extract upload date
         video_upload_date = None
-        mobj = re.search(r'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage)
+        mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage)
         if mobj is not None:
-            video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
+            video_upload_date = unified_strdate(mobj.group(1))
 
         try:
             view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
@@ -379,7 +373,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
             for tt in text_tracks:
                 subtitles[tt['lang']] = [{
                     'ext': 'vtt',
-                    'url': 'http://vimeo.com' + tt['url'],
+                    'url': 'https://vimeo.com' + tt['url'],
                 }]
 
         return {
@@ -402,11 +396,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
 
 class VimeoChannelIE(InfoExtractor):
     IE_NAME = 'vimeo:channel'
-    _VALID_URL = r'https?://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
+    _VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
     _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
     _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
     _TESTS = [{
-        'url': 'http://vimeo.com/channels/tributes',
+        'url': 'https://vimeo.com/channels/tributes',
         'info_dict': {
             'id': 'tributes',
             'title': 'Vimeo Tributes',
@@ -435,10 +429,10 @@ class VimeoChannelIE(InfoExtractor):
             name="([^"]+)"\s+
             value="([^"]*)"
             ''', login_form))
-        token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
+        token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token')
         fields['token'] = token
         fields['password'] = password
-        post = compat_urllib_parse.urlencode(fields)
+        post = urlencode_postdata(fields)
         password_path = self._search_regex(
             r'action="([^"]+)"', login_form, 'password URL')
         password_url = compat_urlparse.urljoin(page_url, password_path)
@@ -465,7 +459,7 @@ class VimeoChannelIE(InfoExtractor):
             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
                 break
 
-        entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
+        entries = [self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
                    for video_id in video_ids]
         return {'_type': 'playlist',
                 'id': list_id,
@@ -476,15 +470,15 @@ class VimeoChannelIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         channel_id = mobj.group('id')
-        return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id)
+        return self._extract_videos(channel_id, 'https://vimeo.com/channels/%s' % channel_id)
 
 
 class VimeoUserIE(VimeoChannelIE):
     IE_NAME = 'vimeo:user'
-    _VALID_URL = r'https?://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
+    _VALID_URL = r'https://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
     _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
     _TESTS = [{
-        'url': 'http://vimeo.com/nkistudio/videos',
+        'url': 'https://vimeo.com/nkistudio/videos',
         'info_dict': {
             'title': 'Nki',
             'id': 'nkistudio',
@@ -495,15 +489,15 @@ class VimeoUserIE(VimeoChannelIE):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         name = mobj.group('name')
-        return self._extract_videos(name, 'http://vimeo.com/%s' % name)
+        return self._extract_videos(name, 'https://vimeo.com/%s' % name)
 
 
 class VimeoAlbumIE(VimeoChannelIE):
     IE_NAME = 'vimeo:album'
-    _VALID_URL = r'https?://vimeo\.com/album/(?P<id>\d+)'
+    _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)'
     _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
     _TESTS = [{
-        'url': 'http://vimeo.com/album/2632481',
+        'url': 'https://vimeo.com/album/2632481',
         'info_dict': {
             'id': '2632481',
             'title': 'Staff Favorites: November 2013',
@@ -527,14 +521,14 @@ class VimeoAlbumIE(VimeoChannelIE):
 
     def _real_extract(self, url):
         album_id = self._match_id(url)
-        return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
+        return self._extract_videos(album_id, 'https://vimeo.com/album/%s' % album_id)
 
 
 class VimeoGroupsIE(VimeoAlbumIE):
     IE_NAME = 'vimeo:group'
-    _VALID_URL = r'(?:https?://)?vimeo\.com/groups/(?P<name>[^/]+)'
+    _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)'
     _TESTS = [{
-        'url': 'http://vimeo.com/groups/rolexawards',
+        'url': 'https://vimeo.com/groups/rolexawards',
         'info_dict': {
             'id': 'rolexawards',
             'title': 'Rolex Awards for Enterprise',
@@ -548,13 +542,13 @@ class VimeoGroupsIE(VimeoAlbumIE):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         name = mobj.group('name')
-        return self._extract_videos(name, 'http://vimeo.com/groups/%s' % name)
+        return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name)
 
 
 class VimeoReviewIE(InfoExtractor):
     IE_NAME = 'vimeo:review'
     IE_DESC = 'Review pages on vimeo'
-    _VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
+    _VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
         'md5': 'c507a72f780cacc12b2248bb4006d253',
@@ -566,7 +560,7 @@ class VimeoReviewIE(InfoExtractor):
         }
     }, {
         'note': 'video player needs Referer',
-        'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053',
+        'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
         'md5': '6295fdab8f4bf6a002d058b2c6dce276',
         'info_dict': {
             'id': '91613211',
@@ -588,11 +582,11 @@ class VimeoReviewIE(InfoExtractor):
 class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
     IE_NAME = 'vimeo:watchlater'
     IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
-    _VALID_URL = r'https?://vimeo\.com/home/watchlater|:vimeowatchlater'
+    _VALID_URL = r'https://vimeo\.com/home/watchlater|:vimeowatchlater'
     _LOGIN_REQUIRED = True
     _TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
     _TESTS = [{
-        'url': 'http://vimeo.com/home/watchlater',
+        'url': 'https://vimeo.com/home/watchlater',
         'only_matching': True,
     }]
 
@@ -612,7 +606,7 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
 
 
 class VimeoLikesIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
+    _VALID_URL = r'https://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
     IE_NAME = 'vimeo:likes'
     IE_DESC = 'Vimeo user likes'
     _TEST = {
@@ -640,8 +634,8 @@ class VimeoLikesIE(InfoExtractor):
         description = self._html_search_meta('description', webpage)
 
         def _get_page(idx):
-            page_url = '%s//vimeo.com/user%s/likes/page:%d/sort:date' % (
-                self.http_scheme(), user_id, idx + 1)
+            page_url = 'https://vimeo.com/user%s/likes/page:%d/sort:date' % (
+                user_id, idx + 1)
             webpage = self._download_webpage(
                 page_url, user_id,
                 note='Downloading page %d/%d' % (idx + 1, page_count))
index b294767c5b6bc5a7ec918e9108cc4cff1a5fac40..19f8762ae57e814a11f868be1a008e21bbaf8efc 100644 (file)
@@ -8,6 +8,7 @@ from ..compat import compat_urlparse
 from ..utils import (
     float_or_none,
     month_by_abbreviation,
+    ExtractorError,
 )
 
 
@@ -28,23 +29,45 @@ class YamIE(InfoExtractor):
         }
     }, {
         # An external video hosted on YouTube
-        'url': 'http://mymedia.yam.com/m/3598173',
-        'md5': '0238ceec479c654e8c2f1223755bf3e9',
+        'url': 'http://mymedia.yam.com/m/3599430',
+        'md5': '03127cf10d8f35d120a9e8e52e3b17c6',
         'info_dict': {
-            'id': 'pJ2Deys283c',
+            'id': 'CNpEoQlrIgA',
             'ext': 'mp4',
-            'upload_date': '20150202',
+            'upload_date': '20150306',
             'uploader': '新莊社大瑜伽社',
-            'description': 'md5:f5cc72f0baf259a70fb731654b0d2eff',
+            'description': 'md5:11e2e405311633ace874f2e6226c8b17',
             'uploader_id': '2323agoy',
-            'title': '外婆的澎湖灣KTV-潘安邦',
-        }
+            'title': '20090412陽明山二子坪-1',
+        },
+        'skip': 'Video does not exist',
+    }, {
+        'url': 'http://mymedia.yam.com/m/3598173',
+        'info_dict': {
+            'id': '3598173',
+            'ext': 'mp4',
+        },
+        'skip': 'cause Yam system error',
+    }, {
+        'url': 'http://mymedia.yam.com/m/3599437',
+        'info_dict': {
+            'id': '3599437',
+            'ext': 'mp4',
+        },
+        'skip': 'invalid YouTube URL',
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         page = self._download_webpage(url, video_id)
 
+        # Check for errors
+        system_msg = self._html_search_regex(
+            r'系統訊息(?:<br>|\n|\r)*([^<>]+)<br>', page, 'system message',
+            default=None)
+        if system_msg:
+            raise ExtractorError(system_msg, expected=True)
+
         # Is it hosted externally on YouTube?
         youtube_url = self._html_search_regex(
             r'<embed src="(http://www.youtube.com/[^"]+)"',
diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py
new file mode 100644 (file)
index 0000000..f4c0f57
--- /dev/null
@@ -0,0 +1,127 @@
+# coding=utf-8
+from __future__ import unicode_literals
+
+import re
+import hashlib
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    int_or_none,
+    float_or_none,
+)
+
+
+class YandexMusicBaseIE(InfoExtractor):
+    def _get_track_url(self, storage_dir, track_id):
+        data = self._download_json(
+            'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
+            % storage_dir,
+            track_id, 'Downloading track location JSON')
+
+        key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']).encode('utf-8')).hexdigest()
+        storage = storage_dir.split('.')
+
+        return ('http://%s/get-mp3/%s/%s?track-id=%s&from=service-10-track&similarities-experiment=default'
+                % (data['host'], key, data['ts'] + data['path'], storage[1]))
+
+    def _get_track_info(self, track):
+        return {
+            'id': track['id'],
+            'ext': 'mp3',
+            'url': self._get_track_url(track['storageDir'], track['id']),
+            'title': '%s - %s' % (track['artists'][0]['name'], track['title']),
+            'filesize': int_or_none(track.get('fileSize')),
+            'duration': float_or_none(track.get('durationMs'), 1000),
+        }
+
+
+class YandexMusicTrackIE(YandexMusicBaseIE):
+    IE_NAME = 'yandexmusic:track'
+    IE_DESC = 'Яндекс.Музыка - Трек'
+    _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://music.yandex.ru/album/540508/track/4878838',
+        'md5': 'f496818aa2f60b6c0062980d2e00dc20',
+        'info_dict': {
+            'id': '4878838',
+            'ext': 'mp3',
+            'title': 'Carlo Ambrosio - Gypsy Eyes 1',
+            'filesize': 4628061,
+            'duration': 193.04,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        album_id, track_id = mobj.group('album_id'), mobj.group('id')
+
+        track = self._download_json(
+            'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id),
+            track_id, 'Downloading track JSON')['track']
+
+        return self._get_track_info(track)
+
+
+class YandexMusicAlbumIE(YandexMusicBaseIE):
+    IE_NAME = 'yandexmusic:album'
+    IE_DESC = 'Яндекс.Музыка - Альбом'
+    _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
+
+    _TEST = {
+        'url': 'http://music.yandex.ru/album/540508',
+        'info_dict': {
+            'id': '540508',
+            'title': 'Carlo Ambrosio - Gypsy Soul (2009)',
+        },
+        'playlist_count': 50,
+    }
+
+    def _real_extract(self, url):
+        album_id = self._match_id(url)
+
+        album = self._download_json(
+            'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
+            album_id, 'Downloading album JSON')
+
+        entries = [self._get_track_info(track) for track in album['volumes'][0]]
+
+        title = '%s - %s' % (album['artists'][0]['name'], album['title'])
+        year = album.get('year')
+        if year:
+            title += ' (%s)' % year
+
+        return self.playlist_result(entries, compat_str(album['id']), title)
+
+
+class YandexMusicPlaylistIE(YandexMusicBaseIE):
+    IE_NAME = 'yandexmusic:playlist'
+    IE_DESC = 'Яндекс.Музыка - Плейлист'
+    _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
+        'info_dict': {
+            'id': '1245',
+            'title': 'Что слушают Enter Shikari',
+            'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
+        },
+        'playlist_count': 6,
+    }
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        playlist = self._parse_json(
+            self._search_regex(
+                r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
+            playlist_id)['pageData']['playlist']
+
+        entries = [self._get_track_info(track) for track in playlist['tracks']]
+
+        return self.playlist_result(
+            entries, compat_str(playlist_id),
+            playlist['title'], playlist.get('description'))
index 3690f8021267b30171be4f2e7a019133aaeaaca9..27c8c4453773974f52d02ecba58f092221c6e118 100644 (file)
@@ -1532,7 +1532,7 @@ class YoutubeSearchURLIE(InfoExtractor):
 
         webpage = self._download_webpage(url, query)
         result_code = self._search_regex(
-            r'(?s)<ol class="item-section"(.*?)</ol>', webpage, 'result HTML')
+            r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
 
         part_codes = re.findall(
             r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
index a4ca8adc42ba9222da13c581c08ae8e7af6794fe..4e6e47d6fdc430f4071c1470f25a1b7377ad7845 100644 (file)
@@ -563,7 +563,7 @@ def parseOpts(overrideArguments=None):
         action='store_true', dest='verbose', default=False,
         help='print various debugging information')
     verbosity.add_option(
-        '--dump-intermediate-pages',
+        '--dump-pages', '--dump-intermediate-pages',
         action='store_true', dest='dump_intermediate_pages', default=False,
         help='print downloaded pages to debug problems (very verbose)')
     verbosity.add_option(
@@ -735,6 +735,15 @@ def parseOpts(overrideArguments=None):
         '--add-metadata',
         action='store_true', dest='addmetadata', default=False,
         help='write metadata to the video file')
+    postproc.add_option(
+        '--metadata-from-title',
+        metavar='FORMAT', dest='metafromtitle',
+        help='parse additional metadata like song title / artist from the video title. '
+             'The format syntax is the same as --output, '
+             'the parsed parameters replace existing values. '
+             'Additional templates: %(album), %(artist). '
+             'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
+             '"Coldplay - Paradise"')
     postproc.add_option(
         '--xattrs',
         action='store_true', dest='xattrs', default=False,
index 708df3dd493ca97e6d1649d572ac68a0d6847464..f39acadce8ac6ab0603e2355af8397edd93249ac 100644 (file)
@@ -15,6 +15,7 @@ from .ffmpeg import (
 )
 from .xattrpp import XAttrMetadataPP
 from .execafterdownload import ExecAfterDownloadPP
+from .metadatafromtitle import MetadataFromTitlePP
 
 
 def get_postprocessor(key):
@@ -34,5 +35,6 @@ __all__ = [
     'FFmpegPostProcessor',
     'FFmpegSubtitlesConvertorPP',
     'FFmpegVideoConvertorPP',
+    'MetadataFromTitlePP',
     'XAttrMetadataPP',
 ]
index 30094c2f37f767f937052306ddf3967279858a01..b6f51cfd5e1ed5cebb4981dfbf3152e67ed33d1d 100644 (file)
@@ -545,7 +545,9 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
             metadata['title'] = info['title']
         if info.get('upload_date') is not None:
             metadata['date'] = info['upload_date']
-        if info.get('uploader') is not None:
+        if info.get('artist') is not None:
+            metadata['artist'] = info['artist']
+        elif info.get('uploader') is not None:
             metadata['artist'] = info['uploader']
         elif info.get('uploader_id') is not None:
             metadata['artist'] = info['uploader_id']
@@ -554,6 +556,8 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
             metadata['comment'] = info['description']
         if info.get('webpage_url') is not None:
             metadata['purl'] = info['webpage_url']
+        if info.get('album') is not None:
+            metadata['album'] = info['album']
 
         if not metadata:
             self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py
new file mode 100644 (file)
index 0000000..5019433
--- /dev/null
@@ -0,0 +1,47 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import PostProcessor
+from ..utils import PostProcessingError
+
+
+class MetadataFromTitlePPError(PostProcessingError):
+    pass
+
+
+class MetadataFromTitlePP(PostProcessor):
+    def __init__(self, downloader, titleformat):
+        super(MetadataFromTitlePP, self).__init__(downloader)
+        self._titleformat = titleformat
+        self._titleregex = self.format_to_regex(titleformat)
+
+    def format_to_regex(self, fmt):
+        """
+        Converts a string like
+           '%(title)s - %(artist)s'
+        to a regex like
+           '(?P<title>.+)\ \-\ (?P<artist>.+)'
+        """
+        lastpos = 0
+        regex = ""
+        # replace %(..)s with regex group and escape other string parts
+        for match in re.finditer(r'%\((\w+)\)s', fmt):
+            regex += re.escape(fmt[lastpos:match.start()])
+            regex += r'(?P<' + match.group(1) + '>.+)'
+            lastpos = match.end()
+        if lastpos < len(fmt):
+            regex += re.escape(fmt[lastpos:len(fmt)])
+        return regex
+
+    def run(self, info):
+        title = info['title']
+        match = re.match(self._titleregex, title)
+        if match is None:
+            raise MetadataFromTitlePPError('Could not interpret title of video as "%s"' % self._titleformat)
+        for attribute, value in match.groupdict().items():
+            value = match.group(attribute)
+            info[attribute] = value
+            self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value)
+
+        return True, info
index ef14f9a367197164ad15fc0ab6d15f47ef2c9b59..e82e3998a7c30d8ae14c9561b51946df0cbebcff 100644 (file)
@@ -252,15 +252,12 @@ def sanitize_open(filename, open_mode):
             raise
 
         # In case of error, try to remove win32 forbidden chars
-        alt_filename = os.path.join(
-            re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
-            for path_part in os.path.split(filename)
-        )
+        alt_filename = sanitize_path(filename)
         if alt_filename == filename:
             raise
         else:
             # An exception here should be caught in the caller
-            stream = open(encodeFilename(filename), open_mode)
+            stream = open(encodeFilename(alt_filename), open_mode)
             return (stream, alt_filename)
 
 
@@ -311,6 +308,24 @@ def sanitize_filename(s, restricted=False, is_id=False):
     return result
 
 
+def sanitize_path(s):
+    """Sanitizes and normalizes path on Windows"""
+    if sys.platform != 'win32':
+        return s
+    drive, _ = os.path.splitdrive(s)
+    unc, _ = os.path.splitunc(s)
+    unc_or_drive = unc or drive
+    norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep)
+    if unc_or_drive:
+        norm_path.pop(0)
+    sanitized_path = [
+        path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
+        for path_part in norm_path]
+    if unc_or_drive:
+        sanitized_path.insert(0, unc_or_drive + os.path.sep)
+    return os.path.join(*sanitized_path)
+
+
 def orderedSet(iterable):
     """ Remove all duplicates from the input iterable """
     res = []
index 252933993d49afe9ff1b4039bb7e8a33986daaaa..7ed07c375a9a898d45a145722a2ebd6ad93a9285 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2015.03.03.1'
+__version__ = '2015.03.15'