projects
/
youtube-dl
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
33cc1ea
)
[nrktv:season,series] Fix extraction and update tests (closes #17159, closes #17258)
author
Sergey M․
<dstftw@gmail.com>
Thu, 6 Dec 2018 17:49:24 +0000
(
00:49
+0700)
committer
Sergey M․
<dstftw@gmail.com>
Thu, 6 Dec 2018 17:49:24 +0000
(
00:49
+0700)
youtube_dl/extractor/nrk.py
patch
|
blob
|
history
diff --git
a/youtube_dl/extractor/nrk.py
b/youtube_dl/extractor/nrk.py
index c5001ef4869f13c7a5f95c0f904715e0f7c9825f..48bc6fd7a643e682f5aab16d83ac6f31e0ed9f58 100644
(file)
--- a/
youtube_dl/extractor/nrk.py
+++ b/
youtube_dl/extractor/nrk.py
@@
-211,13
+211,13
@@
class NRKIE(NRKBaseIE):
_TESTS = [{
# video
'url': 'http://www.nrk.no/video/PS*150533',
_TESTS = [{
# video
'url': 'http://www.nrk.no/video/PS*150533',
- 'md5': '
2f7f6eeb2aacdd99885f355428715cfa
',
+ 'md5': '
706f34cdf1322577589e369e522b50ef
',
'info_dict': {
'id': '150533',
'ext': 'mp4',
'title': 'Dompap og andre fugler i Piip-Show',
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
'info_dict': {
'id': '150533',
'ext': 'mp4',
'title': 'Dompap og andre fugler i Piip-Show',
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
- 'duration': 26
3
,
+ 'duration': 26
2
,
}
}, {
# audio
}
}, {
# audio
@@
-256,14
+256,14
@@
class NRKTVIE(NRKBaseIE):
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
_TESTS = [{
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
_TESTS = [{
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
- 'md5': '
4e9ca6629f09e588ed240fb11619922a
',
+ 'md5': '
9a167e54d04671eb6317a37b7bc8a280
',
'info_dict': {
'id': 'MUHH48000314AA',
'ext': 'mp4',
'title': '20 spørsmål 23.05.2014',
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
'duration': 1741,
'info_dict': {
'id': 'MUHH48000314AA',
'ext': 'mp4',
'title': '20 spørsmål 23.05.2014',
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
'duration': 1741,
- 'series': '20 spørsmål
- TV
',
+ 'series': '20 spørsmål',
'episode': '23.05.2014',
},
}, {
'episode': '23.05.2014',
},
}, {
@@
-301,7
+301,7
@@
class NRKTVIE(NRKBaseIE):
'id': 'MSPO40010515AH',
'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
'id': 'MSPO40010515AH',
'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
- 'description': 'md5:
c03aba1e917561eface5214020551b7a
',
+ 'description': 'md5:
1f97a41f05a9486ee00c56f35f82993d
',
'duration': 772,
'series': 'Tour de Ski',
'episode': '06.01.2015',
'duration': 772,
'series': 'Tour de Ski',
'episode': '06.01.2015',
@@
-314,7
+314,7
@@
class NRKTVIE(NRKBaseIE):
'id': 'MSPO40010515BH',
'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
'id': 'MSPO40010515BH',
'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
- 'description': 'md5:
c03aba1e917561eface5214020551b7a
',
+ 'description': 'md5:
1f97a41f05a9486ee00c56f35f82993d
',
'duration': 6175,
'series': 'Tour de Ski',
'episode': '06.01.2015',
'duration': 6175,
'series': 'Tour de Ski',
'episode': '06.01.2015',
@@
-326,7
+326,7
@@
class NRKTVIE(NRKBaseIE):
'info_dict': {
'id': 'MSPO40010515',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
'info_dict': {
'id': 'MSPO40010515',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
- 'description': 'md5:
c03aba1e917561eface5214020551b7a
',
+ 'description': 'md5:
1f97a41f05a9486ee00c56f35f82993d
',
},
'expected_warnings': ['Video is geo restricted'],
}, {
},
'expected_warnings': ['Video is geo restricted'],
}, {
@@
-406,21
+406,35
@@
class NRKTVSerieBaseIE(InfoExtractor):
def _extract_series(self, webpage, display_id, fatal=True):
config = self._parse_json(
self._search_regex(
def _extract_series(self, webpage, display_id, fatal=True):
config = self._parse_json(
self._search_regex(
- r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', webpage, 'config',
- default='{}' if not fatal else NO_DEFAULT),
+ (r'INITIAL_DATA_*\s*=\s*({.+?})\s*;',
+ r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
+ webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
display_id, fatal=False)
if not config:
return
display_id, fatal=False)
if not config:
return
- return try_get(config, lambda x: x['series'], dict)
+ return try_get(
+ config,
+ (lambda x: x['initialState']['series'], lambda x: x['series']),
+ dict)
+
+ def _extract_seasons(self, seasons):
+ if not isinstance(seasons, list):
+ return []
+ entries = []
+ for season in seasons:
+ entries.extend(self._extract_episodes(season))
+ return entries
def _extract_episodes(self, season):
def _extract_episodes(self, season):
- entries = []
if not isinstance(season, dict):
if not isinstance(season, dict):
- return entries
- episodes = season.get('episodes')
- if not isinstance(episodes, list):
- return entries
- for episode in episodes:
+ return []
+ return self._extract_entries(season.get('episodes'))
+
+ def _extract_entries(self, entry_list):
+ if not isinstance(entry_list, list):
+ return []
+ entries = []
+ for episode in entry_list:
nrk_id = episode.get('prfId')
if not nrk_id or not isinstance(nrk_id, compat_str):
continue
nrk_id = episode.get('prfId')
if not nrk_id or not isinstance(nrk_id, compat_str):
continue
@@
-465,7
+479,7
@@
class NRKTVSeriesIE(NRKTVSerieBaseIE):
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
_TESTS = [{
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
_TESTS = [{
- # new layout
+ # new layout
, seasons
'url': 'https://tv.nrk.no/serie/backstage',
'info_dict': {
'id': 'backstage',
'url': 'https://tv.nrk.no/serie/backstage',
'info_dict': {
'id': 'backstage',
@@
-474,20
+488,21
@@
class NRKTVSeriesIE(NRKTVSerieBaseIE):
},
'playlist_mincount': 60,
}, {
},
'playlist_mincount': 60,
}, {
- #
old layout
+ #
new layout, instalments
'url': 'https://tv.nrk.no/serie/groenn-glede',
'info_dict': {
'id': 'groenn-glede',
'title': 'Grønn glede',
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
},
'url': 'https://tv.nrk.no/serie/groenn-glede',
'info_dict': {
'id': 'groenn-glede',
'title': 'Grønn glede',
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
},
- 'playlist_mincount':
9
,
+ 'playlist_mincount':
10
,
}, {
}, {
- 'url': 'http://tv.nrksuper.no/serie/labyrint',
+ # old layout
+ 'url': 'https://tv.nrksuper.no/serie/labyrint',
'info_dict': {
'id': 'labyrint',
'title': 'Labyrint',
'info_dict': {
'id': 'labyrint',
'title': 'Labyrint',
- 'description': 'md5:
58afd450974c89e27d5a19212eee7115
',
+ 'description': 'md5:
318b597330fdac5959247c9b69fdb1ec
',
},
'playlist_mincount': 3,
}, {
},
'playlist_mincount': 3,
}, {
@@
-520,11
+535,11
@@
class NRKTVSeriesIE(NRKTVSerieBaseIE):
description = try_get(
series, lambda x: x['titles']['subtitle'], compat_str)
entries = []
description = try_get(
series, lambda x: x['titles']['subtitle'], compat_str)
entries = []
- for season in series['seasons']:
-
entries.extend(self._extract_episodes(season
))
+ entries.extend(self._extract_seasons(series.get('seasons')))
+
entries.extend(self._extract_entries(series.get('instalments')
))
return self.playlist_result(entries, series_id, title, description)
return self.playlist_result(entries, series_id, title, description)
- # Old layout (e.g. https://tv.nrk
.no/serie/groenn-glede
)
+ # Old layout (e.g. https://tv.nrk
super.no/serie/labyrint
)
entries = [
self.url_result(
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
entries = [
self.url_result(
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
@@
-536,6
+551,9
@@
class NRKTVSeriesIE(NRKTVSerieBaseIE):
'seriestitle', webpage,
'title', default=None) or self._og_search_title(
webpage, fatal=False)
'seriestitle', webpage,
'title', default=None) or self._og_search_title(
webpage, fatal=False)
+ if title:
+ title = self._search_regex(
+ r'NRK (?:Super )?TV\s*[-–]\s*(.+)', title, 'title', default=title)
description = self._html_search_meta(
'series_description', webpage,
description = self._html_search_meta(
'series_description', webpage,
@@
-596,7
+614,7
@@
class NRKPlaylistIE(NRKPlaylistBaseIE):
'title': 'Rivertonprisen til Karin Fossum',
'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
},
'title': 'Rivertonprisen til Karin Fossum',
'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
},
- 'playlist_count':
5
,
+ 'playlist_count':
2
,
}]
def _extract_title(self, webpage):
}]
def _extract_title(self, webpage):