added YouJizz extractor

author Jeff Crouse <jefftimesten@gmail.com>

Sun, 16 Dec 2012 05:26:27 +0000 (00:26 -0500)

committer Jeff Crouse <jefftimesten@gmail.com>

Sun, 16 Dec 2012 05:26:27 +0000 (00:26 -0500)
author Jeff Crouse <jefftimesten@gmail.com>
Sun, 16 Dec 2012 05:26:27 +0000 (00:26 -0500)
committer Jeff Crouse <jefftimesten@gmail.com>
Sun, 16 Dec 2012 05:26:27 +0000 (00:26 -0500)
diff --git a/README.md b/README.md

index 1a96bd98d858ef11a790b5648157c3da1cb642db..0ac75f5d7f6905c59de466bf1ba46e5c549da3d0 100644 (file)
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ which means you can modify it, redistribute it or use it however you like.
      --list-extractors        List all supported extractors and the URLs they
                               would handle
  
      --list-extractors        List all supported extractors and the URLs they
                               would handle
  
-## Video Selection:
+  Video Selection:
      --playlist-start NUMBER  playlist video to start at (default is 1)
      --playlist-end NUMBER    playlist video to end at (default is last)
      --match-title REGEX      download only matching titles (regex or caseless
      --playlist-start NUMBER  playlist video to start at (default is 1)
      --playlist-end NUMBER    playlist video to end at (default is last)
      --match-title REGEX      download only matching titles (regex or caseless
@@ -34,7 +34,7 @@ which means you can modify it, redistribute it or use it however you like.
                               caseless sub-string)
      --max-downloads NUMBER   Abort after downloading NUMBER files
  
                               caseless sub-string)
      --max-downloads NUMBER   Abort after downloading NUMBER files
  
-## Filesystem Options:
+  Filesystem Options:
      -t, --title              use title in file name
      --id                     use video ID in file name
      -l, --literal            use literal title in file name
      -t, --title              use title in file name
      --id                     use video ID in file name
      -l, --literal            use literal title in file name
@@ -59,7 +59,7 @@ which means you can modify it, redistribute it or use it however you like.
      --write-description      write video description to a .description file
      --write-info-json        write video metadata to a .info.json file
  
      --write-description      write video description to a .description file
      --write-info-json        write video metadata to a .info.json file
  
-## Verbosity / Simulation Options:
+  Verbosity / Simulation Options:
      -q, --quiet              activates quiet mode
      -s, --simulate           do not download the video and do not write anything
                               to disk
      -q, --quiet              activates quiet mode
      -s, --simulate           do not download the video and do not write anything
                               to disk
@@ -74,7 +74,7 @@ which means you can modify it, redistribute it or use it however you like.
      --console-title          display progress in console titlebar
      -v, --verbose            print various debugging information
  
      --console-title          display progress in console titlebar
      -v, --verbose            print various debugging information
  
-## Video Format Options:
+  Video Format Options:
      -f, --format FORMAT      video format code
      --all-formats            download all available video formats
      --prefer-free-formats    prefer free video formats unless a specific one is
      -f, --format FORMAT      video format code
      --all-formats            download all available video formats
      --prefer-free-formats    prefer free video formats unless a specific one is
@@ -86,12 +86,12 @@ which means you can modify it, redistribute it or use it however you like.
      --srt-lang LANG          language of the closed captions to download
                               (optional) use IETF language tags like 'en'
  
      --srt-lang LANG          language of the closed captions to download
                               (optional) use IETF language tags like 'en'
  
-## Authentication Options:
+  Authentication Options:
      -u, --username USERNAME  account username
      -p, --password PASSWORD  account password
      -n, --netrc              use .netrc authentication data
  
      -u, --username USERNAME  account username
      -p, --password PASSWORD  account password
      -n, --netrc              use .netrc authentication data
  
-## Post-processing Options:
+  Post-processing Options:
      -x, --extract-audio      convert video files to audio-only files (requires
                               ffmpeg or avconv and ffprobe or avprobe)
      --audio-format FORMAT    "best", "aac", "vorbis", "mp3", "m4a", or "wav";
      -x, --extract-audio      convert video files to audio-only files (requires
                               ffmpeg or avconv and ffprobe or avprobe)
      --audio-format FORMAT    "best", "aac", "vorbis", "mp3", "m4a", or "wav";
@@ -133,7 +133,7 @@ youtube requires an additional signature since September 2012 which is not suppo
  The error
  
      File "youtube-dl", line 2
  The error
  
      File "youtube-dl", line 2
-    SyntaxError: Non-ASCII character '\x93' ...
+    SyntaxError: Non-ASCII character '\93' ...
  
  means you're using an outdated version of Python. Please update to Python 2.6 or 2.7.
  
  
  means you're using an outdated version of Python. Please update to Python 2.6 or 2.7.
  
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index c44fc852c41317ce2fa9da594d1c49e5aedd0d51..d30de69435e728a44ba1c4d6d9d44326e0b995bd 100644 (file)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -3439,7 +3439,7 @@ class YouPornIE(InfoExtractor):
                         return
                 self.report_webpage(url)
  
                         return
                 self.report_webpage(url)
  
-               # Get the video URL
+               # Get the video title
                 result = re.search(self.VIDEO_TITLE_RE, webpage)
                 if result is None:
                         self._downloader.trouble(u'ERROR: unable to extract video title')
                 result = re.search(self.VIDEO_TITLE_RE, webpage)
                 if result is None:
                         self._downloader.trouble(u'ERROR: unable to extract video title')
@@ -3610,3 +3610,87 @@ class PornotubeIE(InfoExtractor):
                 return [info]
  
  
                 return [info]
  
  
+
+
+class YouJizzIE(InfoExtractor):
+       """Information extractor for youjizz.com."""
+
+       _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/([^.]+).html$'
+       IE_NAME = u'youjizz'
+       VIDEO_TITLE_RE = r'<title>(?P<title>.*)</title>'
+       EMBED_PAGE_RE = r'http://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)'
+       SOURCE_RE = r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);'
+
+       def __init__(self, downloader=None):
+               InfoExtractor.__init__(self, downloader)
+
+       def report_extract_entry(self, url):
+               """Report downloading extry"""
+               self._downloader.to_screen(u'[youjizz] Downloading entry: %s' % url.decode('utf-8'))
+
+       def report_webpage(self, url):
+               """Report downloading page"""
+               self._downloader.to_screen(u'[youjizz] Downloaded page: %s' % url)
+
+       def report_title(self, video_title):
+               """Report downloading extry"""
+               self._downloader.to_screen(u'[youjizz] Title: %s' % video_title.decode('utf-8'))
+
+       def report_embed_page(self, embed_page):
+               """Report downloading extry"""
+               self._downloader.to_screen(u'[youjizz] Embed Page: %s' % embed_page.decode('utf-8'))
+
+       def _real_extract(self, url):
+               # Get webpage content
+               try:
+                       webpage = urllib2.urlopen(url).read()
+               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % err)
+                       return
+               self.report_webpage(url)
+
+               # Get the video title
+               result = re.search(self.VIDEO_TITLE_RE, webpage)
+               if result is None:
+                       self._downloader.trouble(u'ERROR: unable to extract video title')
+                       return
+               video_title = result.group('title').decode('utf-8').strip()
+               self.report_title(video_title)
+
+               # Get the embed page
+               result = re.search(self.EMBED_PAGE_RE, webpage)
+               if result is None:
+                       self._downloader.trouble(u'ERROR: unable to extract embed page')
+                       return
+
+               embed_page_url = result.group(0).decode('utf-8').strip()
+               video_id = result.group('videoid').decode('utf-8')
+               self.report_embed_page(embed_page_url)
+       
+               try:
+                       webpage = urllib2.urlopen(embed_page_url).read()
+               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                       self._downloader.trouble(u'ERROR: unable to download video embed page: %s' % err)
+                       return
+               
+               # Get the video URL
+               result = re.search(self.SOURCE_RE, webpage)
+               if result is None:
+                       self._downloader.trouble(u'ERROR: unable to extract video url')
+                       return
+               video_url = result.group('source').decode('utf-8')
+               self.report_extract_entry(video_url)
+
+               info = {'id': video_id,
+                               'url': video_url,
+                               'uploader': None,
+                               'upload_date': None,
+                               'title': video_title,
+                               'ext': 'flv',
+                               'format': 'flv',
+                               'thumbnail': None,
+                               'description': None,
+                               'player_url': embed_page_url}
+
+               return [info]
+
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index 5a2a55b49b450fa25cf1d60e17b792a324cb830d..e99ac2d593747d001f595c5c10e51fd15e1901c4 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -363,7 +363,7 @@ def gen_extractors():
                 GooglePlusIE(),
                 PornotubeIE(),
                 YouPornIE(),
                 GooglePlusIE(),
                 PornotubeIE(),
                 YouPornIE(),
-               
+               YouJizzIE(),
                 GenericIE()
         ]
  
                 GenericIE()
         ]
author	Jeff Crouse <jefftimesten@gmail.com>
	Sun, 16 Dec 2012 05:26:27 +0000 (00:26 -0500)
committer	Jeff Crouse <jefftimesten@gmail.com>
	Sun, 16 Dec 2012 05:26:27 +0000 (00:26 -0500)
README.md		patch \| blob \| history
youtube_dl/InfoExtractors.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history