commit 4560adc820a5d4bda5babc62f0f7fc306b13ad86
parent 63dce3094bf45964b49a2c9f26c94b10cf60c2c2
Author: Sergey M․ <dstftw@gmail.com>
Date:   Tue, 24 Mar 2020 02:43:24 +0700

[teachable] Extract chapter metadata (closes #24421)

Diffstat:
Myoutube_dl/extractor/teachable.py | 25+++++++++++++++++++++++++
1 file changed, 25 insertions(+), 0 deletions(-)

diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py @@ -7,7 +7,9 @@ from .wistia import WistiaIE from ..utils import ( clean_html, ExtractorError, + int_or_none, get_element_by_class, + strip_or_none, urlencode_postdata, urljoin, ) @@ -173,11 +175,34 @@ class TeachableIE(TeachableBaseIE): title = self._og_search_title(webpage, default=None) + chapter = None + chapter_number = None + section_item = self._search_regex( + r'(?s)(?P<li><li[^>]+\bdata-lecture-id=["\']%s[^>]+>.+?</li>)' % video_id, + webpage, 'section item', default=None, group='li') + if section_item: + chapter_number = int_or_none(self._search_regex( + r'data-ss-position=["\'](\d+)', section_item, 'section id', + default=None)) + if chapter_number is not None: + sections = [] + for s in re.findall( + r'(?s)<div[^>]+\bclass=["\']section-title[^>]+>(.+?)</div>', webpage): + section = strip_or_none(clean_html(s)) + if not section: + sections = [] + break + sections.append(section) + if chapter_number <= len(sections): + chapter = sections[chapter_number - 1] + entries = [{ '_type': 'url_transparent', 'url': wistia_url, 'ie_key': WistiaIE.ie_key(), 'title': title, + 'chapter': chapter, + 'chapter_number': chapter_number, } for wistia_url in wistia_urls] return self.playlist_result(entries, video_id, title)