commit 6471d0d3b8086b282622c84a9eea968d4edfcf9b
parent 5ef62fc4ce1f255343d67b70f3cee2f2240cdfba
Author: Remita Amine <remitamine@gmail.com>
Date:   Tue, 26 Nov 2019 23:57:37 +0100

[openload] remove OpenLoad related extractors(closes #11999)(closes #15406)

Diffstat:
Myoutube_dl/extractor/extractors.py | 5-----
Myoutube_dl/extractor/generic.py | 16----------------
Myoutube_dl/extractor/openload.py | 263-------------------------------------------------------------------------------
Dyoutube_dl/extractor/streamango.py | 128-------------------------------------------------------------------------------
4 files changed, 0 insertions(+), 412 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py @@ -796,10 +796,6 @@ from .ooyala import ( OoyalaIE, OoyalaExternalIE, ) -from .openload import ( - OpenloadIE, - VerystreamIE, -) from .ora import OraTVIE from .orf import ( ORFTVthekIE, @@ -1060,7 +1056,6 @@ from .srmediathek import SRMediathekIE from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE from .streamable import StreamableIE -from .streamango import StreamangoIE from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py @@ -88,10 +88,6 @@ from .piksel import PikselIE from .videa import VideaIE from .twentymin import TwentyMinutenIE from .ustream import UstreamIE -from .openload import ( - OpenloadIE, - VerystreamIE, -) from .videopress import VideoPressIE from .rutube import RutubeIE from .limelight import LimelightBaseIE @@ -3048,18 +3044,6 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key()) - # Look for Openload embeds - openload_urls = OpenloadIE._extract_urls(webpage) - if openload_urls: - return self.playlist_from_matches( - openload_urls, video_id, video_title, ie=OpenloadIE.ie_key()) - - # Look for Verystream embeds - verystream_urls = VerystreamIE._extract_urls(webpage) - if verystream_urls: - return self.playlist_from_matches( - verystream_urls, video_id, video_title, ie=VerystreamIE.ie_key()) - # Look for VideoPress embeds videopress_urls = VideoPressIE._extract_urls(webpage) if videopress_urls: diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py @@ -3,21 +3,17 @@ from __future__ import unicode_literals import json import os -import re import subprocess import tempfile -from .common import InfoExtractor from ..compat import ( compat_urlparse, compat_kwargs, ) from ..utils import ( check_executable, - determine_ext, encodeArgument, ExtractorError, - get_element_by_id, get_exe_version, is_outdated_version, std_headers, @@ -240,262 +236,3 @@ class PhantomJSwrapper(object): self._load_cookies() return (html, encodeArgument(out)) - - -class OpenloadIE(InfoExtractor): - _DOMAINS = r''' - (?: - openload\.(?:co|io|link|pw)| - oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|monster|press|pw|life|live|space|services|website|vip)| - oladblock\.(?:services|xyz|me)|openloed\.co - ) - ''' - _VALID_URL = r'''(?x) - https?:// - (?P<host> - (?:www\.)? - %s - )/ - (?:f|embed)/ - (?P<id>[a-zA-Z0-9-_]+) - ''' % _DOMAINS - _EMBED_WORD = 'embed' - _STREAM_WORD = 'f' - _REDIR_WORD = 'stream' - _URL_IDS = ('streamurl', 'streamuri', 'streamurj') - _TESTS = [{ - 'url': 'https://openload.co/f/kUEfGclsU9o', - 'md5': 'bf1c059b004ebc7a256f89408e65c36e', - 'info_dict': { - 'id': 'kUEfGclsU9o', - 'ext': 'mp4', - 'title': 'skyrim_no-audio_1080.mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - }, { - 'url': 'https://openload.co/embed/rjC09fkPLYs', - 'info_dict': { - 'id': 'rjC09fkPLYs', - 'ext': 'mp4', - 'title': 'movie.mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - 'subtitles': { - 'en': [{ - 'ext': 'vtt', - }], - }, - }, - 'params': { - 'skip_download': True, # test subtitles only - }, - }, { - 'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4', - 'only_matching': True, - }, { - 'url': 'https://openload.io/f/ZAn6oz-VZGE/', - 'only_matching': True, - }, { - 'url': 'https://openload.co/f/_-ztPaZtMhM/', - 'only_matching': True, - }, { - # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout - # for title and ext - 'url': 'https://openload.co/embed/Sxz5sADo82g/', - 'only_matching': True, - }, { - # unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available - # via https://openload.co/f/e-Ixz9ZR5L0/ - 'url': 'https://openload.co/f/e-Ixz9ZR5L0/', - 'only_matching': True, - }, { - 'url': 'https://oload.tv/embed/KnG-kKZdcfY/', - 'only_matching': True, - }, { - 'url': 'http://www.openload.link/f/KnG-kKZdcfY', - 'only_matching': True, - }, { - 'url': 'https://oload.stream/f/KnG-kKZdcfY', - 'only_matching': True, - }, { - 'url': 'https://oload.xyz/f/WwRBpzW8Wtk', - 'only_matching': True, - }, { - 'url': 'https://oload.win/f/kUEfGclsU9o', - 'only_matching': True, - }, { - 'url': 'https://oload.download/f/kUEfGclsU9o', - 'only_matching': True, - }, { - 'url': 'https://oload.cloud/f/4ZDnBXRWiB8', - 'only_matching': True, - }, { - # Its title has not got its extension but url has it - 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4', - 'only_matching': True, - }, { - 'url': 'https://oload.cc/embed/5NEAbI2BDSk', - 'only_matching': True, - }, { - 'url': 'https://oload.icu/f/-_i4y_F_Hs8', - 'only_matching': True, - }, { - 'url': 'https://oload.fun/f/gb6G1H4sHXY', - 'only_matching': True, - }, { - 'url': 'https://oload.club/f/Nr1L-aZ2dbQ', - 'only_matching': True, - }, { - 'url': 'https://oload.info/f/5NEAbI2BDSk', - 'only_matching': True, - }, { - 'url': 'https://openload.pw/f/WyKgK8s94N0', - 'only_matching': True, - }, { - 'url': 'https://oload.pw/f/WyKgK8s94N0', - 'only_matching': True, - }, { - 'url': 'https://oload.live/f/-Z58UZ-GR4M', - 'only_matching': True, - }, { - 'url': 'https://oload.space/f/IY4eZSst3u8/', - 'only_matching': True, - }, { - 'url': 'https://oload.services/embed/bs1NWj1dCag/', - 'only_matching': True, - }, { - 'url': 'https://oload.online/f/W8o2UfN1vNY/', - 'only_matching': True, - }, { - 'url': 'https://oload.monster/f/W8o2UfN1vNY/', - 'only_matching': True, - }, { - 'url': 'https://oload.press/embed/drTBl1aOTvk/', - 'only_matching': True, - }, { - 'url': 'https://oload.website/embed/drTBl1aOTvk/', - 'only_matching': True, - }, { - 'url': 'https://oload.life/embed/oOzZjNPw9Dc/', - 'only_matching': True, - }, { - 'url': 'https://oload.biz/f/bEk3Gp8ARr4/', - 'only_matching': True, - }, { - 'url': 'https://oload.best/embed/kkz9JgVZeWc/', - 'only_matching': True, - }, { - 'url': 'https://oladblock.services/f/b8NWEgkqNLI/', - 'only_matching': True, - }, { - 'url': 'https://oladblock.xyz/f/b8NWEgkqNLI/', - 'only_matching': True, - }, { - 'url': 'https://oladblock.me/f/b8NWEgkqNLI/', - 'only_matching': True, - }, { - 'url': 'https://openloed.co/f/b8NWEgkqNLI/', - 'only_matching': True, - }, { - 'url': 'https://oload.vip/f/kUEfGclsU9o', - 'only_matching': True, - }] - - @classmethod - def _extract_urls(cls, webpage): - return re.findall( - r'(?x)<iframe[^>]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)' - % (cls._DOMAINS, cls._EMBED_WORD), webpage) - - def _extract_decrypted_page(self, page_url, webpage, video_id): - phantom = PhantomJSwrapper(self, required_version='2.0') - webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id) - return webpage - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') - video_id = mobj.group('id') - - url_pattern = 'https://%s/%%s/%s/' % (host, video_id) - - for path in (self._EMBED_WORD, self._STREAM_WORD): - page_url = url_pattern % path - last = path == self._STREAM_WORD - webpage = self._download_webpage( - page_url, video_id, 'Downloading %s webpage' % path, - fatal=last) - if not webpage: - continue - if 'File not found' in webpage or 'deleted by the owner' in webpage: - if not last: - continue - raise ExtractorError('File not found', expected=True, video_id=video_id) - break - - webpage = self._extract_decrypted_page(page_url, webpage, video_id) - for element_id in self._URL_IDS: - decoded_id = get_element_by_id(element_id, webpage) - if decoded_id: - break - if not decoded_id: - decoded_id = self._search_regex( - (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<', - r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)', - r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<', - r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<', - r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage, - 'stream URL') - video_url = 'https://%s/%s/%s?mime=true' % (host, self._REDIR_WORD, decoded_id) - - title = self._og_search_title(webpage, default=None) or self._search_regex( - r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, - 'title', default=None) or self._html_search_meta( - 'description', webpage, 'title', fatal=True) - - entries = self._parse_html5_media_entries(page_url, webpage, video_id) - entry = entries[0] if entries else {} - subtitles = entry.get('subtitles') - - return { - 'id': video_id, - 'title': title, - 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None), - 'url': video_url, - 'ext': determine_ext(title, None) or determine_ext(url, 'mp4'), - 'subtitles': subtitles, - } - - -class VerystreamIE(OpenloadIE): - IE_NAME = 'verystream' - - _DOMAINS = r'(?:verystream\.com|woof\.tube)' - _VALID_URL = r'''(?x) - https?:// - (?P<host> - (?:www\.)? - %s - )/ - (?:stream|e)/ - (?P<id>[a-zA-Z0-9-_]+) - ''' % _DOMAINS - _EMBED_WORD = 'e' - _STREAM_WORD = 'stream' - _REDIR_WORD = 'gettoken' - _URL_IDS = ('videolink', ) - _TESTS = [{ - 'url': 'https://verystream.com/stream/c1GWQ9ngBBx/', - 'md5': 'd3e8c5628ccb9970b65fd65269886795', - 'info_dict': { - 'id': 'c1GWQ9ngBBx', - 'ext': 'mp4', - 'title': 'Big Buck Bunny.mp4', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - }, { - 'url': 'https://verystream.com/e/c1GWQ9ngBBx/', - 'only_matching': True, - }] - - def _extract_decrypted_page(self, page_url, webpage, video_id): - return webpage # for Verystream, the webpage is already decrypted diff --git a/youtube_dl/extractor/streamango.py b/youtube_dl/extractor/streamango.py @@ -1,128 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import compat_chr -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - js_to_json, -) - - -class StreamangoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net|streamcherry\.com)/(?:f|embed)/(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4', - 'md5': 'e992787515a182f55e38fc97588d802a', - 'info_dict': { - 'id': 'clapasobsptpkdfe', - 'ext': 'mp4', - 'title': '20170315_150006.mp4', - } - }, { - # no og:title - 'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4', - 'info_dict': { - 'id': 'foqebrpftarclpob', - 'ext': 'mp4', - 'title': 'foqebrpftarclpob', - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'gone', - }, { - 'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4', - 'only_matching': True, - }, { - 'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4', - 'only_matching': True, - }, { - 'url': 'https://streamcherry.com/f/clapasobsptpkdfe/', - 'only_matching': True, - }] - - def _real_extract(self, url): - def decrypt_src(encoded, val): - ALPHABET = '=/+9876543210zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA' - encoded = re.sub(r'[^A-Za-z0-9+/=]', '', encoded) - decoded = '' - sm = [None] * 4 - i = 0 - str_len = len(encoded) - while i < str_len: - for j in range(4): - sm[j % 4] = ALPHABET.index(encoded[i]) - i += 1 - char_code = ((sm[0] << 0x2) | (sm[1] >> 0x4)) ^ val - decoded += compat_chr(char_code) - if sm[2] != 0x40: - char_code = ((sm[1] & 0xf) << 0x4) | (sm[2] >> 0x2) - decoded += compat_chr(char_code) - if sm[3] != 0x40: - char_code = ((sm[2] & 0x3) << 0x6) | sm[3] - decoded += compat_chr(char_code) - return decoded - - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - title = self._og_search_title(webpage, default=video_id) - - formats = [] - for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage): - mobj = re.search(r'(src\s*:\s*[^(]+\(([^)]*)\)[\s,]*)', format_) - if mobj is None: - continue - - format_ = format_.replace(mobj.group(0), '') - - video = self._parse_json( - format_, video_id, transform_source=js_to_json, - fatal=False) or {} - - mobj = re.search( - r'([\'"])(?P<src>(?:(?!\1).)+)\1\s*,\s*(?P<val>\d+)', - mobj.group(1)) - if mobj is None: - continue - - src = decrypt_src(mobj.group('src'), int_or_none(mobj.group('val'))) - if not src: - continue - - ext = determine_ext(src, default_ext=None) - if video.get('type') == 'application/dash+xml' or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - src, video_id, mpd_id='dash', fatal=False)) - else: - formats.append({ - 'url': src, - 'ext': ext or 'mp4', - 'width': int_or_none(video.get('width')), - 'height': int_or_none(video.get('height')), - 'tbr': int_or_none(video.get('bitrate')), - }) - - if not formats: - error = self._search_regex( - r'<p[^>]+\bclass=["\']lead[^>]+>(.+?)</p>', webpage, - 'error', default=None) - if not error and '>Sorry' in webpage: - error = 'Video %s is not available' % video_id - if error: - raise ExtractorError(error, expected=True) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'url': url, - 'title': title, - 'formats': formats, - }