added YouJizz extractor
authorJeff Crouse <jefftimesten@gmail.com>
Sun, 16 Dec 2012 05:26:27 +0000 (00:26 -0500)
committerJeff Crouse <jefftimesten@gmail.com>
Sun, 16 Dec 2012 05:26:27 +0000 (00:26 -0500)
README.md
youtube_dl/InfoExtractors.py
youtube_dl/__init__.py

index 1a96bd98d858ef11a790b5648157c3da1cb642db..0ac75f5d7f6905c59de466bf1ba46e5c549da3d0 100644 (file)
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ which means you can modify it, redistribute it or use it however you like.
     --list-extractors        List all supported extractors and the URLs they
                              would handle
 
     --list-extractors        List all supported extractors and the URLs they
                              would handle
 
-## Video Selection:
+  Video Selection:
     --playlist-start NUMBER  playlist video to start at (default is 1)
     --playlist-end NUMBER    playlist video to end at (default is last)
     --match-title REGEX      download only matching titles (regex or caseless
     --playlist-start NUMBER  playlist video to start at (default is 1)
     --playlist-end NUMBER    playlist video to end at (default is last)
     --match-title REGEX      download only matching titles (regex or caseless
@@ -34,7 +34,7 @@ which means you can modify it, redistribute it or use it however you like.
                              caseless sub-string)
     --max-downloads NUMBER   Abort after downloading NUMBER files
 
                              caseless sub-string)
     --max-downloads NUMBER   Abort after downloading NUMBER files
 
-## Filesystem Options:
+  Filesystem Options:
     -t, --title              use title in file name
     --id                     use video ID in file name
     -l, --literal            use literal title in file name
     -t, --title              use title in file name
     --id                     use video ID in file name
     -l, --literal            use literal title in file name
@@ -59,7 +59,7 @@ which means you can modify it, redistribute it or use it however you like.
     --write-description      write video description to a .description file
     --write-info-json        write video metadata to a .info.json file
 
     --write-description      write video description to a .description file
     --write-info-json        write video metadata to a .info.json file
 
-## Verbosity / Simulation Options:
+  Verbosity / Simulation Options:
     -q, --quiet              activates quiet mode
     -s, --simulate           do not download the video and do not write anything
                              to disk
     -q, --quiet              activates quiet mode
     -s, --simulate           do not download the video and do not write anything
                              to disk
@@ -74,7 +74,7 @@ which means you can modify it, redistribute it or use it however you like.
     --console-title          display progress in console titlebar
     -v, --verbose            print various debugging information
 
     --console-title          display progress in console titlebar
     -v, --verbose            print various debugging information
 
-## Video Format Options:
+  Video Format Options:
     -f, --format FORMAT      video format code
     --all-formats            download all available video formats
     --prefer-free-formats    prefer free video formats unless a specific one is
     -f, --format FORMAT      video format code
     --all-formats            download all available video formats
     --prefer-free-formats    prefer free video formats unless a specific one is
@@ -86,12 +86,12 @@ which means you can modify it, redistribute it or use it however you like.
     --srt-lang LANG          language of the closed captions to download
                              (optional) use IETF language tags like 'en'
 
     --srt-lang LANG          language of the closed captions to download
                              (optional) use IETF language tags like 'en'
 
-## Authentication Options:
+  Authentication Options:
     -u, --username USERNAME  account username
     -p, --password PASSWORD  account password
     -n, --netrc              use .netrc authentication data
 
     -u, --username USERNAME  account username
     -p, --password PASSWORD  account password
     -n, --netrc              use .netrc authentication data
 
-## Post-processing Options:
+  Post-processing Options:
     -x, --extract-audio      convert video files to audio-only files (requires
                              ffmpeg or avconv and ffprobe or avprobe)
     --audio-format FORMAT    "best", "aac", "vorbis", "mp3", "m4a", or "wav";
     -x, --extract-audio      convert video files to audio-only files (requires
                              ffmpeg or avconv and ffprobe or avprobe)
     --audio-format FORMAT    "best", "aac", "vorbis", "mp3", "m4a", or "wav";
@@ -133,7 +133,7 @@ youtube requires an additional signature since September 2012 which is not suppo
 The error
 
     File "youtube-dl", line 2
 The error
 
     File "youtube-dl", line 2
-    SyntaxError: Non-ASCII character '\x93' ...
+    SyntaxError: Non-ASCII character '\93' ...
 
 means you're using an outdated version of Python. Please update to Python 2.6 or 2.7.
 
 
 means you're using an outdated version of Python. Please update to Python 2.6 or 2.7.
 
index c44fc852c41317ce2fa9da594d1c49e5aedd0d51..d30de69435e728a44ba1c4d6d9d44326e0b995bd 100644 (file)
@@ -3439,7 +3439,7 @@ class YouPornIE(InfoExtractor):
                        return
                self.report_webpage(url)
 
                        return
                self.report_webpage(url)
 
-               # Get the video URL
+               # Get the video title
                result = re.search(self.VIDEO_TITLE_RE, webpage)
                if result is None:
                        self._downloader.trouble(u'ERROR: unable to extract video title')
                result = re.search(self.VIDEO_TITLE_RE, webpage)
                if result is None:
                        self._downloader.trouble(u'ERROR: unable to extract video title')
@@ -3610,3 +3610,87 @@ class PornotubeIE(InfoExtractor):
                return [info]
 
 
                return [info]
 
 
+
+
+class YouJizzIE(InfoExtractor):
+       """Information extractor for youjizz.com."""
+
+       _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/([^.]+).html$'
+       IE_NAME = u'youjizz'
+       VIDEO_TITLE_RE = r'<title>(?P<title>.*)</title>'
+       EMBED_PAGE_RE = r'http://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)'
+       SOURCE_RE = r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);'
+
+       def __init__(self, downloader=None):
+               InfoExtractor.__init__(self, downloader)
+
+       def report_extract_entry(self, url):
+               """Report downloading extry"""
+               self._downloader.to_screen(u'[youjizz] Downloading entry: %s' % url.decode('utf-8'))
+
+       def report_webpage(self, url):
+               """Report downloading page"""
+               self._downloader.to_screen(u'[youjizz] Downloaded page: %s' % url)
+
+       def report_title(self, video_title):
+               """Report downloading extry"""
+               self._downloader.to_screen(u'[youjizz] Title: %s' % video_title.decode('utf-8'))
+
+       def report_embed_page(self, embed_page):
+               """Report downloading extry"""
+               self._downloader.to_screen(u'[youjizz] Embed Page: %s' % embed_page.decode('utf-8'))
+
+       def _real_extract(self, url):
+               # Get webpage content
+               try:
+                       webpage = urllib2.urlopen(url).read()
+               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % err)
+                       return
+               self.report_webpage(url)
+
+               # Get the video title
+               result = re.search(self.VIDEO_TITLE_RE, webpage)
+               if result is None:
+                       self._downloader.trouble(u'ERROR: unable to extract video title')
+                       return
+               video_title = result.group('title').decode('utf-8').strip()
+               self.report_title(video_title)
+
+               # Get the embed page
+               result = re.search(self.EMBED_PAGE_RE, webpage)
+               if result is None:
+                       self._downloader.trouble(u'ERROR: unable to extract embed page')
+                       return
+
+               embed_page_url = result.group(0).decode('utf-8').strip()
+               video_id = result.group('videoid').decode('utf-8')
+               self.report_embed_page(embed_page_url)
+       
+               try:
+                       webpage = urllib2.urlopen(embed_page_url).read()
+               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                       self._downloader.trouble(u'ERROR: unable to download video embed page: %s' % err)
+                       return
+               
+               # Get the video URL
+               result = re.search(self.SOURCE_RE, webpage)
+               if result is None:
+                       self._downloader.trouble(u'ERROR: unable to extract video url')
+                       return
+               video_url = result.group('source').decode('utf-8')
+               self.report_extract_entry(video_url)
+
+               info = {'id': video_id,
+                               'url': video_url,
+                               'uploader': None,
+                               'upload_date': None,
+                               'title': video_title,
+                               'ext': 'flv',
+                               'format': 'flv',
+                               'thumbnail': None,
+                               'description': None,
+                               'player_url': embed_page_url}
+
+               return [info]
+
index 5a2a55b49b450fa25cf1d60e17b792a324cb830d..e99ac2d593747d001f595c5c10e51fd15e1901c4 100644 (file)
@@ -363,7 +363,7 @@ def gen_extractors():
                GooglePlusIE(),
                PornotubeIE(),
                YouPornIE(),
                GooglePlusIE(),
                PornotubeIE(),
                YouPornIE(),
-               
+               YouJizzIE(),
                GenericIE()
        ]
 
                GenericIE()
        ]