[extractor/common] Improve _json_ld for articles

[youtube-dl] / youtube_dl / extractor / common.py
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 35d427eec974a14ff27425780c13ae18be1f3aa7..deafb48508fc7a0def88e5bd23fab558d37d8213 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1027,7 +1027,7 @@ class InfoExtractor(object):
                      part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
                      if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
                          info['series'] = unescapeHTML(part_of_series.get('name'))
-                elif item_type == 'Article':
+                elif item_type in ('Article', 'NewsArticle'):
                      info.update({
                          'timestamp': parse_iso8601(e.get('datePublished')),
                          'title': unescapeHTML(e.get('headline')),
@@ -1880,6 +1880,7 @@ class InfoExtractor(object):
                              'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
                              'format_note': 'DASH %s' % content_type,
                              'filesize': filesize,
+                            'container': mimetype2ext(mime_type) + '_dash',
                          }
                          f.update(parse_codecs(representation_attrib.get('codecs')))
                          representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
@@ -2054,7 +2055,7 @@ class InfoExtractor(object):
              stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
              stream_name = stream.get('Name')
              for track in stream.findall('QualityLevel'):
-                fourcc = track.get('FourCC')
+                fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
                  # TODO: add support for WVC1 and WMAP
                  if fourcc not in ('H264', 'AVC1', 'AACL'):
                      self.report_warning('%s is not a supported codec' % fourcc)
@@ -2403,7 +2404,7 @@ class InfoExtractor(object):
                  formats.extend(self._extract_m3u8_formats(
                      source_url, video_id, 'mp4', entry_protocol='m3u8_native',
                      m3u8_id=m3u8_id, fatal=False))
-            elif ext == 'mpd':
+            elif source_type == 'dash' or ext == 'mpd':
                  formats.extend(self._extract_mpd_formats(
                      source_url, video_id, mpd_id=mpd_id, fatal=False))
              elif ext == 'smil':