2
0
mirror of https://github.com/Nick80835/microbot synced 2025-08-23 18:49:31 +00:00
microbot/ubot/fixes/parallel_download.py

73 lines
2.5 KiB
Python
Raw Normal View History

2020-07-26 10:32:16 -04:00
# SPDX-License-Identifier: GPL-2.0-or-later
import mimetypes
from asyncio import gather
2020-07-26 11:24:53 -04:00
from os import remove
2020-07-26 10:32:16 -04:00
2020-07-26 11:24:53 -04:00
import aiofiles
2020-07-26 10:32:16 -04:00
from aiohttp import ClientSession
class ParallelDownload:
2020-07-26 11:24:53 -04:00
def __init__(self, url: str, aioclient: ClientSession, file_name: str):
2020-07-26 10:32:16 -04:00
self.url = url
self.aioclient = aioclient
2020-07-26 11:24:53 -04:00
self.file_name = file_name
2020-07-26 10:32:16 -04:00
2020-07-26 11:24:53 -04:00
async def download_chunk(self, chunk_start: int, chunk_end: int, total_size: int, chunk_number: int) -> str:
2020-07-26 10:32:16 -04:00
chunk_headers = {
"Content-Range": f"bytes {chunk_start}-{chunk_end}/{total_size}"
}
async with self.aioclient.get(self.url, headers=chunk_headers) as response:
2020-07-26 11:24:53 -04:00
async with aiofiles.open(f"ubot/cache/{self.file_name}.part{chunk_number}", mode="wb") as cache_file:
while True:
chunk = await response.content.read(4096)
2020-07-26 10:32:16 -04:00
2020-07-26 11:24:53 -04:00
if not chunk:
break
2020-07-26 10:32:16 -04:00
2020-07-26 11:24:53 -04:00
await cache_file.write(chunk)
await cache_file.flush()
return f"ubot/cache/{self.file_name}.part{chunk_number}"
async def generate_chunk_coros(self, chunk_size: int) -> (list, str):
2020-07-26 10:32:16 -04:00
async with self.aioclient.get(self.url) as response:
content_length = int(response.headers["content-length"])
file_extension = mimetypes.guess_extension(response.headers["content-type"])
place = 0
2020-07-26 11:24:53 -04:00
chunk_number = 0
2020-07-26 10:32:16 -04:00
chunk_coros = []
2020-07-26 11:24:53 -04:00
while place < content_length:
if place + chunk_size > content_length:
chunk_coros.append(self.download_chunk(place, content_length, content_length, chunk_number))
2020-07-26 10:32:16 -04:00
break
2020-07-26 11:24:53 -04:00
chunk_coros.append(self.download_chunk(place, place + chunk_size, content_length, chunk_number))
2020-07-26 10:32:16 -04:00
place += chunk_size
2020-07-26 11:24:53 -04:00
chunk_number += 1
2020-07-26 10:32:16 -04:00
return chunk_coros, file_extension
2020-07-26 11:24:53 -04:00
async def download(url: str, file_name: str, aioclient: ClientSession = ClientSession(), chunk_size: int = 5000000) -> str:
downloader = ParallelDownload(url, aioclient, file_name)
chunk_coros, file_extension = await downloader.generate_chunk_coros(chunk_size)
downloaded_part_files = await gather(*chunk_coros)
async with aiofiles.open(f"ubot/cache/{file_name}{file_extension}", "wb") as final_fh:
for part_file in downloaded_part_files:
async with aiofiles.open(part_file, "rb") as part_fh:
await final_fh.write(await part_fh.read())
remove(part_file)
await final_fh.flush()
2020-07-26 10:32:16 -04:00
2020-07-26 11:24:53 -04:00
return f"ubot/cache/{file_name}{file_extension}"