diff options
author | Radu Carpa <radu.carpa@cern.ch> | 2023-08-03 09:15:23 +0200 |
---|---|---|
committer | Calum Lind <calumlind+deluge@gmail.com> | 2023-11-20 11:05:39 +0100 |
commit | 1751d62df9dc41adcd3caff2bb6a0c0d824a4fc5 (patch) | |
tree | 99cc405c41828d29424d63ad35bd6c605e19d215 | |
parent | [Core] Make create_torrent return a deferred (diff) | |
download | deluge-1751d62df9dc41adcd3caff2bb6a0c0d824a4fc5.tar.xz deluge-1751d62df9dc41adcd3caff2bb6a0c0d824a4fc5.zip |
[Core] Support creating v2 torrents
Add support for v2 torrents in create_torrent, but keep the old
default of only adding the v1 metadata.
Unify the single-file and directory cases to avoid code
duplication.
V2 torrents require files to be piece-aligned. The same for
hybrid v1/v2 ones. To handle both cases of piece-aligned and
non-aligned files, always read the files in piece-aligned
chunks. Re-slice the buffer if needed (for v1-only multi-file
torrents).
Also, had to adapt to progress event. It now depends on the
number of bytes hashed rather than the number of pieces. To
avoid sending and excessive amount of event when handling a
directory with many small files, add a mechanism to limit
event period at 1 per piece_length.
Closes: https://github.com/deluge-torrent/deluge/pull/430
-rw-r--r-- | deluge/core/core.py | 9 | ||||
-rw-r--r-- | deluge/metafile.py | 351 | ||||
-rw-r--r-- | deluge/tests/test_metafile.py | 55 |
3 files changed, 328 insertions, 87 deletions
diff --git a/deluge/core/core.py b/deluge/core/core.py index fc12f7bc5..e2130f595 100644 --- a/deluge/core/core.py +++ b/deluge/core/core.py @@ -21,7 +21,7 @@ from twisted.web.client import Agent, readBody import deluge.common import deluge.component as component -from deluge import path_chooser_common +from deluge import metafile, path_chooser_common from deluge._libtorrent import LT_VERSION, lt from deluge.configmanager import ConfigManager, get_config_dir from deluge.core.alertmanager import AlertManager @@ -998,7 +998,11 @@ class Core(component.Component): created_by=None, trackers=None, add_to_session=False, + torrent_format=metafile.TorrentFormat.V1, ): + if isinstance(torrent_format, str): + torrent_format = metafile.TorrentFormat(torrent_format) + log.debug('creating torrent..') return threads.deferToThread( self._create_torrent_thread, @@ -1012,6 +1016,7 @@ class Core(component.Component): created_by=created_by, trackers=trackers, add_to_session=add_to_session, + torrent_format=torrent_format, ) def _create_torrent_thread( @@ -1026,6 +1031,7 @@ class Core(component.Component): created_by, trackers, add_to_session, + torrent_format, ): from deluge import metafile @@ -1038,6 +1044,7 @@ class Core(component.Component): private=private, created_by=created_by, trackers=trackers, + torrent_format=torrent_format, ) write_file = False diff --git a/deluge/metafile.py b/deluge/metafile.py index 906cc81e3..81a371ff3 100644 --- a/deluge/metafile.py +++ b/deluge/metafile.py @@ -10,10 +10,13 @@ # See LICENSE for more details. # +import copy import logging import os.path import time +from enum import Enum from hashlib import sha1 as sha +from hashlib import sha256 import deluge.component as component from deluge.bencode import bencode @@ -41,6 +44,35 @@ def dummy(*v): pass +class TorrentFormat(str, Enum): + V1 = 'v1' + V2 = 'v2' + HYBRID = 'hybrid' + + @classmethod + def _missing_(cls, value): + if not value: + return None + + value = value.lower() + for member in cls: + if member.value == value: + return member + + def to_lt_flag(self): + if self.value == 'v1': + return 64 + if self.value == 'v2': + return 32 + return 0 + + def includes_v1(self): + return self == self.__class__.V1 or self == self.__class__.HYBRID + + def includes_v2(self): + return self == self.__class__.V2 or self == self.__class__.HYBRID + + class RemoteFileProgress: def __init__(self, session_id): self.session_id = session_id @@ -65,6 +97,7 @@ def make_meta_file_content( private=False, created_by=None, trackers=None, + torrent_format=TorrentFormat.V1, ): data = {'creation date': int(gmtime())} if url: @@ -80,10 +113,20 @@ def make_meta_file_content( if session_id: progress = RemoteFileProgress(session_id) - info = makeinfo(path, piece_length, progress, name, content_type, private) + info, piece_layers = makeinfo( + path, + piece_length, + progress, + name, + content_type, + private, + torrent_format, + ) # check_info(info) data['info'] = info + if piece_layers is not None: + data['piece layers'] = piece_layers if title: data['title'] = title.encode('utf8') if comment: @@ -170,101 +213,237 @@ def calcsize(path): return total -def makeinfo(path, piece_length, progress, name=None, content_type=None, private=False): +def _next_pow2(num): + import math + + if not num: + return 1 + return 2 ** math.ceil(math.log2(num)) + + +def _sha256_merkle_root(leafs, nb_leafs, padding, in_place=True) -> bytes: + """ + Build the root of the merkle hash tree from the (possibly incomplete) leafs layer. + If len(leafs) < nb_leafs, it will be padded with the padding repeated as many times + as needed to have nb_leafs in total. + """ + if not in_place: + leafs = copy.copy(leafs) + + while nb_leafs > 1: + nb_leafs = nb_leafs // 2 + for i in range(nb_leafs): + node1 = leafs[2 * i] if 2 * i < len(leafs) else padding + node2 = leafs[2 * i + 1] if 2 * i + 1 < len(leafs) else padding + h = sha256(node1) + h.update(node2) + if i < len(leafs): + leafs[i] = h.digest() + else: + leafs.append(h.digest()) + return leafs[0] if leafs else padding + + +def _sha256_buffer_blocks(buffer, block_len): + import math + + nb_blocks = math.ceil(len(buffer) / block_len) + blocks = [ + sha256(buffer[i * block_len : (i + 1) * block_len]).digest() + for i in range(nb_blocks) + ] + return blocks + + +def makeinfo_lt( + path, piece_length, name=None, private=False, torrent_format=TorrentFormat.V1 +): + """ + Make info using via the libtorrent library. + """ + from deluge._libtorrent import lt + + if not name: + name = os.path.split(path)[1] + + fs = lt.file_storage() + if os.path.isfile(path): + lt.add_files(fs, path) + else: + for p, f in subfiles(path): + fs.add_file(os.path.join(name, *p), os.path.getsize(f)) + torrent = lt.create_torrent( + fs, piece_size=piece_length, flags=torrent_format.to_lt_flag() + ) + + lt.set_piece_hashes(torrent, os.path.dirname(path)) + torrent.set_priv(private) + + t = torrent.generate() + info = t[b'info'] + pieces_layers = t.get(b'piece layers', None) + + return info, pieces_layers + + +def makeinfo( + path, + piece_length, + progress, + name=None, + content_type=None, + private=False, + torrent_format=TorrentFormat.V1, +): # HEREDAVE. If path is directory, how do we assign content type? + + v2_block_len = 2**14 # 16 KiB + v2_blocks_per_piece = 1 + v2_block_padding = b'' + v2_piece_padding = b'' + if torrent_format.includes_v2(): + if _next_pow2(piece_length) != piece_length or piece_length < v2_block_len: + raise ValueError( + 'Bittorrent v2 piece size must be a power of 2; and bigger than 16 KiB' + ) + + v2_blocks_per_piece = piece_length // v2_block_len + v2_block_padding = bytes(32) # 32 = size of sha256 in bytes + v2_piece_padding = _sha256_merkle_root( + [], nb_leafs=v2_blocks_per_piece, padding=v2_block_padding + ) + path = os.path.abspath(path) - piece_count = 0 + files = [] + pieces = [] + file_tree = {} + piece_layers = {} if os.path.isdir(path): - subs = sorted(subfiles(path)) - pieces = [] - sh = sha() - done = 0 - fs = [] + if not name: + name = os.path.split(path)[1] + subs = subfiles(path) + if torrent_format.includes_v2(): + subs = sorted(subs) + length = None totalsize = 0.0 - totalhashed = 0 for p, f in subs: totalsize += os.path.getsize(f) - if totalsize >= piece_length: - import math - - num_pieces = math.ceil(totalsize / piece_length) + else: + name = os.path.split(path)[1] + subs = [([name], path)] + length = os.path.getsize(path) + totalsize = length + is_multi_file = len(subs) > 1 + sh = sha() + done = 0 + totalhashed = 0 + + next_progress_event = piece_length + for p, f in subs: + file_pieces_v2 = [] + pos = 0 + size = os.path.getsize(f) + p2 = [n.encode('utf8') for n in p] + if content_type: + files.append( + {b'length': size, b'path': p2, b'content_type': content_type} + ) # HEREDAVE. bad for batch! else: - num_pieces = 1 - - for p, f in subs: - pos = 0 - size = os.path.getsize(f) - p2 = [n.encode('utf8') for n in p] - if content_type: - fs.append( - {'length': size, 'path': p2, 'content_type': content_type} - ) # HEREDAVE. bad for batch! - else: - fs.append({'length': size, 'path': p2}) - with open(f, 'rb') as file_: - while pos < size: - a = min(size - pos, piece_length - done) - sh.update(file_.read(a)) - done += a - pos += a - totalhashed += a - - if done == piece_length: - pieces.append(sh.digest()) - piece_count += 1 - done = 0 - sh = sha() - progress(piece_count, num_pieces) - if done > 0: + files.append({b'length': size, b'path': p2}) + with open(f, 'rb') as file_: + while pos < size: + to_read = min(size - pos, piece_length) + buffer = memoryview(file_.read(to_read)) + pos += to_read + + if torrent_format.includes_v1(): + a = piece_length - done + for sub_buffer in (buffer[:a], buffer[a:]): + if sub_buffer: + sh.update(sub_buffer) + done += len(sub_buffer) + + if done == piece_length: + pieces.append(sh.digest()) + done = 0 + sh = sha() + if torrent_format.includes_v2(): + block_hashes = _sha256_buffer_blocks(buffer, v2_block_len) + num_leafs = v2_blocks_per_piece + if size <= piece_length: + # The special case when the file is smaller than a piece: only pad till the next power of 2 + num_leafs = _next_pow2(len(block_hashes)) + root = _sha256_merkle_root( + block_hashes, num_leafs, v2_block_padding, in_place=True + ) + file_pieces_v2.append(root) + + totalhashed += to_read + if totalhashed >= next_progress_event: + next_progress_event = totalhashed + piece_length + progress(totalhashed, totalsize) + + if torrent_format == TorrentFormat.HYBRID and is_multi_file and done > 0: + # Add padding file to force piece-alignment + padding = piece_length - done + sh.update(bytes(padding)) + files.append( + { + b'length': padding, + b'attr': b'p', + b'path': [b'.pad', str(padding).encode()], + } + ) pieces.append(sh.digest()) - piece_count += 1 - progress(piece_count, num_pieces) - - if not name: - name = os.path.split(path)[1] - - return { - 'pieces': b''.join(pieces), - 'piece length': piece_length, - 'files': fs, - 'name': name.encode('utf8'), - 'private': private, - } - else: - size = os.path.getsize(path) - if size >= piece_length: - num_pieces = size // piece_length + done = 0 + sh = sha() + + if torrent_format.includes_v2(): + # add file to the `file tree` and, if needed, to the `piece layers` structures + pieces_root = _sha256_merkle_root( + file_pieces_v2, + _next_pow2(len(file_pieces_v2)), + v2_piece_padding, + in_place=False, + ) + dst_directory = file_tree + for directory in p2[:-1]: + dst_directory = dst_directory.setdefault(directory, {}) + dst_directory[p2[-1]] = { + b'': { + b'length': size, + b'pieces root': pieces_root, + } + } + if len(file_pieces_v2) > 1: + piece_layers[pieces_root] = b''.join(file_pieces_v2) + + if done > 0: + pieces.append(sh.digest()) + progress(totalsize, totalsize) + + info = { + b'piece length': piece_length, + b'name': name.encode('utf8'), + } + if private: + info[b'private'] = 1 + if content_type: + info[b'content_type'] = content_type + if torrent_format.includes_v1(): + info[b'pieces'] = b''.join(pieces) + if is_multi_file: + info[b'files'] = files else: - num_pieces = 1 - - pieces = [] - p = 0 - with open(path, 'rb') as _file: - while p < size: - x = _file.read(min(piece_length, size - p)) - pieces.append(sha(x).digest()) - piece_count += 1 - p += piece_length - if p > size: - p = size - progress(piece_count, num_pieces) - name = os.path.split(path)[1].encode('utf8') - if content_type is not None: - return { - 'pieces': b''.join(pieces), - 'piece length': piece_length, - 'length': size, - 'name': name, - 'content_type': content_type, - 'private': private, + info[b'length'] = length + if torrent_format.includes_v2(): + info.update( + { + b'meta version': 2, + b'file tree': file_tree, } - return { - 'pieces': b''.join(pieces), - 'piece length': piece_length, - 'length': size, - 'name': name, - 'private': private, - } + ) + return info, piece_layers if torrent_format.includes_v2() else None def subfiles(d): diff --git a/deluge/tests/test_metafile.py b/deluge/tests/test_metafile.py index fda1cb73e..1b1675052 100644 --- a/deluge/tests/test_metafile.py +++ b/deluge/tests/test_metafile.py @@ -7,7 +7,13 @@ import os import tempfile +import pytest + from deluge import metafile +from deluge._libtorrent import LT_VERSION +from deluge.common import VersionSplit + +from . import common def check_torrent(filename): @@ -55,3 +61,52 @@ class TestMetafile: metafile.make_meta_file(tmp_data, '', 32768, target=tmp_torrent) check_torrent(tmp_torrent) + + @pytest.mark.parametrize( + 'path', + [ + common.get_test_data_file('deluge.png'), + common.get_test_data_file('unicode_filenames.torrent'), + os.path.dirname(common.get_test_data_file('deluge.png')), + ], + ) + @pytest.mark.parametrize( + 'torrent_format', + [ + metafile.TorrentFormat.V1, + metafile.TorrentFormat.V2, + metafile.TorrentFormat.HYBRID, + ], + ) + @pytest.mark.parametrize('piece_length', [2**14, 2**15, 2**16]) + @pytest.mark.parametrize('private', [True, False]) + def test_create_info(self, path, torrent_format, piece_length, private): + our_info, our_piece_layers = metafile.makeinfo( + path, + piece_length, + metafile.dummy, + private=private, + torrent_format=torrent_format, + ) + lt_info, lt_piece_layers = metafile.makeinfo_lt( + path, + piece_length, + private=private, + torrent_format=torrent_format, + ) + + if ( + torrent_format == metafile.TorrentFormat.HYBRID + and os.path.isdir(path) + and VersionSplit(LT_VERSION) <= VersionSplit('2.0.7.0') + ): + # Libtorrent didn't correctly follow the standard until version 2.0.7 included + # https://github.com/arvidn/libtorrent/commit/74d82a0cd7c2e9e3c4294901d7eb65e247050df4 + # If last file is a padding, ignore that file and the last piece. + if our_info[b'files'][-1][b'path'][0] == b'.pad': + our_info[b'files'] = our_info[b'files'][:-1] + our_info[b'pieces'] = our_info[b'pieces'][:-32] + lt_info[b'pieces'] = lt_info[b'pieces'][:-32] + + assert our_info == lt_info + assert our_piece_layers == lt_piece_layers |