commit 274bf5e4c58bceed4ff8c283d77457bf1cb76d3e
parent e993f1a0959fc04507b1cb2efeb610ae628d6d98
Author: Remita Amine <remitamine@gmail.com>
Date:   Fri,  1 Nov 2019 11:37:41 +0100

[kakao] improve extraction

- support embed URLs
- support Kakao Legacy vid based embed URLs
- only extract fields used for extraction
- strip description and extract tags

Diffstat:
Myoutube_dl/extractor/kakao.py | 45++++++++++++++++++++++++---------------------
1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py @@ -6,14 +6,15 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( int_or_none, + strip_or_none, unified_timestamp, update_url_query, ) class KakaoIE(InfoExtractor): - _VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<channel>\d+)/cliplink/(?P<id>\d+)' - _API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks' + _VALID_URL = r'https?://(?:play-)?tv\.kakao\.com/(?:channel/\d+|embed/player)/cliplink/(?P<id>\d+|[^?#&]+@my)' + _API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/' _TESTS = [{ 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083', @@ -36,7 +37,7 @@ class KakaoIE(InfoExtractor): 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회', 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)', 'uploader_id': 2653210, - 'uploader': '쇼 음악중심', + 'uploader': '쇼! 음악중심', 'timestamp': 1485684628, 'upload_date': '20170129', } @@ -44,6 +45,8 @@ class KakaoIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + display_id = video_id.rstrip('@my') + api_base = self._API_BASE_TMPL % video_id player_header = { 'Referer': update_url_query( @@ -55,20 +58,22 @@ class KakaoIE(InfoExtractor): }) } - QUERY_COMMON = { + query = { 'player': 'monet_html5', 'referer': url, 'uuid': '', 'service': 'kakao_tv', 'section': '', 'dteType': 'PC', + 'fields': ','.join([ + '-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title', + 'description', 'channelId', 'createTime', 'duration', 'playCount', + 'likeCount', 'commentCount', 'tagList', 'channel', 'name', + 'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault']) } - query = QUERY_COMMON.copy() - query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList' impress = self._download_json( - '%s/%s/impress' % (self._API_BASE, video_id), - video_id, 'Downloading video info', + api_base + 'impress', display_id, 'Downloading video info', query=query, headers=player_header) clip_link = impress['clipLink'] @@ -78,30 +83,27 @@ class KakaoIE(InfoExtractor): tid = impress.get('tid', '') - query = QUERY_COMMON.copy() query.update({ + 'fields': '-*,outputList,profile,width,height,label,filesize', 'tid': tid, 'profile': 'HIGH', }) raw = self._download_json( - '%s/%s/raw' % (self._API_BASE, video_id), - video_id, 'Downloading video formats info', + api_base + 'raw', display_id, 'Downloading video formats info', query=query, headers=player_header) formats = [] for fmt in raw.get('outputList', []): try: profile_name = fmt['profile'] + query.update({ + 'profile': profile_name, + 'fields': '-*,url', + }) fmt_url_json = self._download_json( - '%s/%s/raw/videolocation' % (self._API_BASE, video_id), - video_id, + api_base + 'raw/videolocation', display_id, 'Downloading video URL for profile %s' % profile_name, - query={ - 'service': 'kakao_tv', - 'section': '', - 'tid': tid, - 'profile': profile_name - }, headers=player_header, fatal=False) + query=query, headers=player_header, fatal=False) if fmt_url_json is None: continue @@ -134,9 +136,9 @@ class KakaoIE(InfoExtractor): }) return { - 'id': video_id, + 'id': display_id, 'title': title, - 'description': clip.get('description'), + 'description': strip_or_none(clip.get('description')), 'uploader': clip_link.get('channel', {}).get('name'), 'uploader_id': clip_link.get('channelId'), 'thumbnails': thumbs, @@ -146,4 +148,5 @@ class KakaoIE(InfoExtractor): 'like_count': int_or_none(clip.get('likeCount')), 'comment_count': int_or_none(clip.get('commentCount')), 'formats': formats, + 'tags': clip.get('tagList'), }