commit 85750f897293b5a56e6be521f8b0be3eec082899
parent 926d97fc6b018a25ea777dfcfb9a84a10920c2b7
Author: Enes <enessolak99@gmail.com>
Date:   Fri,  1 Jun 2018 20:16:22 +0300

[openload] Improve ext extraction


Diffstat:
Mtest/test_utils.py | 1+
Myoutube_dl/extractor/openload.py | 7+++++--
Myoutube_dl/utils.py | 2+-
3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py @@ -361,6 +361,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None) self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None) self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8') + self.assertEqual(determine_ext('foobar', None), None) def test_find_xpath_attr(self): testxml = '''<root> diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py @@ -307,6 +307,10 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.download/f/kUEfGclsU9o', 'only_matching': True, + }, { + # Its title has not got its extension but url has it + 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' @@ -368,8 +372,7 @@ class OpenloadIE(InfoExtractor): 'title': title, 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None), 'url': video_url, - # Seems all videos have extensions in their titles - 'ext': determine_ext(title, 'mp4'), + 'ext': determine_ext(title, None) or determine_ext(url, 'mp4'), 'subtitles': subtitles, 'http_headers': headers, } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -1228,7 +1228,7 @@ def unified_timestamp(date_str, day_first=True): def determine_ext(url, default_ext='unknown_video'): - if url is None: + if url is None or '.' not in url: return default_ext guess = url.partition('?')[0].rpartition('.')[2] if re.match(r'^[A-Za-z0-9]+$', guess):