summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRadu Carpa <radu.carpa@cern.ch>2023-08-03 09:15:23 +0200
committerCalum Lind <calumlind+deluge@gmail.com>2023-11-20 11:05:39 +0100
commit1751d62df9dc41adcd3caff2bb6a0c0d824a4fc5 (patch)
tree99cc405c41828d29424d63ad35bd6c605e19d215
parent[Core] Make create_torrent return a deferred (diff)
downloaddeluge-1751d62df9dc41adcd3caff2bb6a0c0d824a4fc5.tar.xz
deluge-1751d62df9dc41adcd3caff2bb6a0c0d824a4fc5.zip
[Core] Support creating v2 torrents
Add support for v2 torrents in create_torrent, but keep the old default of only adding the v1 metadata. Unify the single-file and directory cases to avoid code duplication. V2 torrents require files to be piece-aligned. The same for hybrid v1/v2 ones. To handle both cases of piece-aligned and non-aligned files, always read the files in piece-aligned chunks. Re-slice the buffer if needed (for v1-only multi-file torrents). Also, had to adapt to progress event. It now depends on the number of bytes hashed rather than the number of pieces. To avoid sending and excessive amount of event when handling a directory with many small files, add a mechanism to limit event period at 1 per piece_length. Closes: https://github.com/deluge-torrent/deluge/pull/430
-rw-r--r--deluge/core/core.py9
-rw-r--r--deluge/metafile.py351
-rw-r--r--deluge/tests/test_metafile.py55
3 files changed, 328 insertions, 87 deletions
diff --git a/deluge/core/core.py b/deluge/core/core.py
index fc12f7bc5..e2130f595 100644
--- a/deluge/core/core.py
+++ b/deluge/core/core.py
@@ -21,7 +21,7 @@ from twisted.web.client import Agent, readBody
import deluge.common
import deluge.component as component
-from deluge import path_chooser_common
+from deluge import metafile, path_chooser_common
from deluge._libtorrent import LT_VERSION, lt
from deluge.configmanager import ConfigManager, get_config_dir
from deluge.core.alertmanager import AlertManager
@@ -998,7 +998,11 @@ class Core(component.Component):
created_by=None,
trackers=None,
add_to_session=False,
+ torrent_format=metafile.TorrentFormat.V1,
):
+ if isinstance(torrent_format, str):
+ torrent_format = metafile.TorrentFormat(torrent_format)
+
log.debug('creating torrent..')
return threads.deferToThread(
self._create_torrent_thread,
@@ -1012,6 +1016,7 @@ class Core(component.Component):
created_by=created_by,
trackers=trackers,
add_to_session=add_to_session,
+ torrent_format=torrent_format,
)
def _create_torrent_thread(
@@ -1026,6 +1031,7 @@ class Core(component.Component):
created_by,
trackers,
add_to_session,
+ torrent_format,
):
from deluge import metafile
@@ -1038,6 +1044,7 @@ class Core(component.Component):
private=private,
created_by=created_by,
trackers=trackers,
+ torrent_format=torrent_format,
)
write_file = False
diff --git a/deluge/metafile.py b/deluge/metafile.py
index 906cc81e3..81a371ff3 100644
--- a/deluge/metafile.py
+++ b/deluge/metafile.py
@@ -10,10 +10,13 @@
# See LICENSE for more details.
#
+import copy
import logging
import os.path
import time
+from enum import Enum
from hashlib import sha1 as sha
+from hashlib import sha256
import deluge.component as component
from deluge.bencode import bencode
@@ -41,6 +44,35 @@ def dummy(*v):
pass
+class TorrentFormat(str, Enum):
+ V1 = 'v1'
+ V2 = 'v2'
+ HYBRID = 'hybrid'
+
+ @classmethod
+ def _missing_(cls, value):
+ if not value:
+ return None
+
+ value = value.lower()
+ for member in cls:
+ if member.value == value:
+ return member
+
+ def to_lt_flag(self):
+ if self.value == 'v1':
+ return 64
+ if self.value == 'v2':
+ return 32
+ return 0
+
+ def includes_v1(self):
+ return self == self.__class__.V1 or self == self.__class__.HYBRID
+
+ def includes_v2(self):
+ return self == self.__class__.V2 or self == self.__class__.HYBRID
+
+
class RemoteFileProgress:
def __init__(self, session_id):
self.session_id = session_id
@@ -65,6 +97,7 @@ def make_meta_file_content(
private=False,
created_by=None,
trackers=None,
+ torrent_format=TorrentFormat.V1,
):
data = {'creation date': int(gmtime())}
if url:
@@ -80,10 +113,20 @@ def make_meta_file_content(
if session_id:
progress = RemoteFileProgress(session_id)
- info = makeinfo(path, piece_length, progress, name, content_type, private)
+ info, piece_layers = makeinfo(
+ path,
+ piece_length,
+ progress,
+ name,
+ content_type,
+ private,
+ torrent_format,
+ )
# check_info(info)
data['info'] = info
+ if piece_layers is not None:
+ data['piece layers'] = piece_layers
if title:
data['title'] = title.encode('utf8')
if comment:
@@ -170,101 +213,237 @@ def calcsize(path):
return total
-def makeinfo(path, piece_length, progress, name=None, content_type=None, private=False):
+def _next_pow2(num):
+ import math
+
+ if not num:
+ return 1
+ return 2 ** math.ceil(math.log2(num))
+
+
+def _sha256_merkle_root(leafs, nb_leafs, padding, in_place=True) -> bytes:
+ """
+ Build the root of the merkle hash tree from the (possibly incomplete) leafs layer.
+ If len(leafs) < nb_leafs, it will be padded with the padding repeated as many times
+ as needed to have nb_leafs in total.
+ """
+ if not in_place:
+ leafs = copy.copy(leafs)
+
+ while nb_leafs > 1:
+ nb_leafs = nb_leafs // 2
+ for i in range(nb_leafs):
+ node1 = leafs[2 * i] if 2 * i < len(leafs) else padding
+ node2 = leafs[2 * i + 1] if 2 * i + 1 < len(leafs) else padding
+ h = sha256(node1)
+ h.update(node2)
+ if i < len(leafs):
+ leafs[i] = h.digest()
+ else:
+ leafs.append(h.digest())
+ return leafs[0] if leafs else padding
+
+
+def _sha256_buffer_blocks(buffer, block_len):
+ import math
+
+ nb_blocks = math.ceil(len(buffer) / block_len)
+ blocks = [
+ sha256(buffer[i * block_len : (i + 1) * block_len]).digest()
+ for i in range(nb_blocks)
+ ]
+ return blocks
+
+
+def makeinfo_lt(
+ path, piece_length, name=None, private=False, torrent_format=TorrentFormat.V1
+):
+ """
+ Make info using via the libtorrent library.
+ """
+ from deluge._libtorrent import lt
+
+ if not name:
+ name = os.path.split(path)[1]
+
+ fs = lt.file_storage()
+ if os.path.isfile(path):
+ lt.add_files(fs, path)
+ else:
+ for p, f in subfiles(path):
+ fs.add_file(os.path.join(name, *p), os.path.getsize(f))
+ torrent = lt.create_torrent(
+ fs, piece_size=piece_length, flags=torrent_format.to_lt_flag()
+ )
+
+ lt.set_piece_hashes(torrent, os.path.dirname(path))
+ torrent.set_priv(private)
+
+ t = torrent.generate()
+ info = t[b'info']
+ pieces_layers = t.get(b'piece layers', None)
+
+ return info, pieces_layers
+
+
+def makeinfo(
+ path,
+ piece_length,
+ progress,
+ name=None,
+ content_type=None,
+ private=False,
+ torrent_format=TorrentFormat.V1,
+):
# HEREDAVE. If path is directory, how do we assign content type?
+
+ v2_block_len = 2**14 # 16 KiB
+ v2_blocks_per_piece = 1
+ v2_block_padding = b''
+ v2_piece_padding = b''
+ if torrent_format.includes_v2():
+ if _next_pow2(piece_length) != piece_length or piece_length < v2_block_len:
+ raise ValueError(
+ 'Bittorrent v2 piece size must be a power of 2; and bigger than 16 KiB'
+ )
+
+ v2_blocks_per_piece = piece_length // v2_block_len
+ v2_block_padding = bytes(32) # 32 = size of sha256 in bytes
+ v2_piece_padding = _sha256_merkle_root(
+ [], nb_leafs=v2_blocks_per_piece, padding=v2_block_padding
+ )
+
path = os.path.abspath(path)
- piece_count = 0
+ files = []
+ pieces = []
+ file_tree = {}
+ piece_layers = {}
if os.path.isdir(path):
- subs = sorted(subfiles(path))
- pieces = []
- sh = sha()
- done = 0
- fs = []
+ if not name:
+ name = os.path.split(path)[1]
+ subs = subfiles(path)
+ if torrent_format.includes_v2():
+ subs = sorted(subs)
+ length = None
totalsize = 0.0
- totalhashed = 0
for p, f in subs:
totalsize += os.path.getsize(f)
- if totalsize >= piece_length:
- import math
-
- num_pieces = math.ceil(totalsize / piece_length)
+ else:
+ name = os.path.split(path)[1]
+ subs = [([name], path)]
+ length = os.path.getsize(path)
+ totalsize = length
+ is_multi_file = len(subs) > 1
+ sh = sha()
+ done = 0
+ totalhashed = 0
+
+ next_progress_event = piece_length
+ for p, f in subs:
+ file_pieces_v2 = []
+ pos = 0
+ size = os.path.getsize(f)
+ p2 = [n.encode('utf8') for n in p]
+ if content_type:
+ files.append(
+ {b'length': size, b'path': p2, b'content_type': content_type}
+ ) # HEREDAVE. bad for batch!
else:
- num_pieces = 1
-
- for p, f in subs:
- pos = 0
- size = os.path.getsize(f)
- p2 = [n.encode('utf8') for n in p]
- if content_type:
- fs.append(
- {'length': size, 'path': p2, 'content_type': content_type}
- ) # HEREDAVE. bad for batch!
- else:
- fs.append({'length': size, 'path': p2})
- with open(f, 'rb') as file_:
- while pos < size:
- a = min(size - pos, piece_length - done)
- sh.update(file_.read(a))
- done += a
- pos += a
- totalhashed += a
-
- if done == piece_length:
- pieces.append(sh.digest())
- piece_count += 1
- done = 0
- sh = sha()
- progress(piece_count, num_pieces)
- if done > 0:
+ files.append({b'length': size, b'path': p2})
+ with open(f, 'rb') as file_:
+ while pos < size:
+ to_read = min(size - pos, piece_length)
+ buffer = memoryview(file_.read(to_read))
+ pos += to_read
+
+ if torrent_format.includes_v1():
+ a = piece_length - done
+ for sub_buffer in (buffer[:a], buffer[a:]):
+ if sub_buffer:
+ sh.update(sub_buffer)
+ done += len(sub_buffer)
+
+ if done == piece_length:
+ pieces.append(sh.digest())
+ done = 0
+ sh = sha()
+ if torrent_format.includes_v2():
+ block_hashes = _sha256_buffer_blocks(buffer, v2_block_len)
+ num_leafs = v2_blocks_per_piece
+ if size <= piece_length:
+ # The special case when the file is smaller than a piece: only pad till the next power of 2
+ num_leafs = _next_pow2(len(block_hashes))
+ root = _sha256_merkle_root(
+ block_hashes, num_leafs, v2_block_padding, in_place=True
+ )
+ file_pieces_v2.append(root)
+
+ totalhashed += to_read
+ if totalhashed >= next_progress_event:
+ next_progress_event = totalhashed + piece_length
+ progress(totalhashed, totalsize)
+
+ if torrent_format == TorrentFormat.HYBRID and is_multi_file and done > 0:
+ # Add padding file to force piece-alignment
+ padding = piece_length - done
+ sh.update(bytes(padding))
+ files.append(
+ {
+ b'length': padding,
+ b'attr': b'p',
+ b'path': [b'.pad', str(padding).encode()],
+ }
+ )
pieces.append(sh.digest())
- piece_count += 1
- progress(piece_count, num_pieces)
-
- if not name:
- name = os.path.split(path)[1]
-
- return {
- 'pieces': b''.join(pieces),
- 'piece length': piece_length,
- 'files': fs,
- 'name': name.encode('utf8'),
- 'private': private,
- }
- else:
- size = os.path.getsize(path)
- if size >= piece_length:
- num_pieces = size // piece_length
+ done = 0
+ sh = sha()
+
+ if torrent_format.includes_v2():
+ # add file to the `file tree` and, if needed, to the `piece layers` structures
+ pieces_root = _sha256_merkle_root(
+ file_pieces_v2,
+ _next_pow2(len(file_pieces_v2)),
+ v2_piece_padding,
+ in_place=False,
+ )
+ dst_directory = file_tree
+ for directory in p2[:-1]:
+ dst_directory = dst_directory.setdefault(directory, {})
+ dst_directory[p2[-1]] = {
+ b'': {
+ b'length': size,
+ b'pieces root': pieces_root,
+ }
+ }
+ if len(file_pieces_v2) > 1:
+ piece_layers[pieces_root] = b''.join(file_pieces_v2)
+
+ if done > 0:
+ pieces.append(sh.digest())
+ progress(totalsize, totalsize)
+
+ info = {
+ b'piece length': piece_length,
+ b'name': name.encode('utf8'),
+ }
+ if private:
+ info[b'private'] = 1
+ if content_type:
+ info[b'content_type'] = content_type
+ if torrent_format.includes_v1():
+ info[b'pieces'] = b''.join(pieces)
+ if is_multi_file:
+ info[b'files'] = files
else:
- num_pieces = 1
-
- pieces = []
- p = 0
- with open(path, 'rb') as _file:
- while p < size:
- x = _file.read(min(piece_length, size - p))
- pieces.append(sha(x).digest())
- piece_count += 1
- p += piece_length
- if p > size:
- p = size
- progress(piece_count, num_pieces)
- name = os.path.split(path)[1].encode('utf8')
- if content_type is not None:
- return {
- 'pieces': b''.join(pieces),
- 'piece length': piece_length,
- 'length': size,
- 'name': name,
- 'content_type': content_type,
- 'private': private,
+ info[b'length'] = length
+ if torrent_format.includes_v2():
+ info.update(
+ {
+ b'meta version': 2,
+ b'file tree': file_tree,
}
- return {
- 'pieces': b''.join(pieces),
- 'piece length': piece_length,
- 'length': size,
- 'name': name,
- 'private': private,
- }
+ )
+ return info, piece_layers if torrent_format.includes_v2() else None
def subfiles(d):
diff --git a/deluge/tests/test_metafile.py b/deluge/tests/test_metafile.py
index fda1cb73e..1b1675052 100644
--- a/deluge/tests/test_metafile.py
+++ b/deluge/tests/test_metafile.py
@@ -7,7 +7,13 @@
import os
import tempfile
+import pytest
+
from deluge import metafile
+from deluge._libtorrent import LT_VERSION
+from deluge.common import VersionSplit
+
+from . import common
def check_torrent(filename):
@@ -55,3 +61,52 @@ class TestMetafile:
metafile.make_meta_file(tmp_data, '', 32768, target=tmp_torrent)
check_torrent(tmp_torrent)
+
+ @pytest.mark.parametrize(
+ 'path',
+ [
+ common.get_test_data_file('deluge.png'),
+ common.get_test_data_file('unicode_filenames.torrent'),
+ os.path.dirname(common.get_test_data_file('deluge.png')),
+ ],
+ )
+ @pytest.mark.parametrize(
+ 'torrent_format',
+ [
+ metafile.TorrentFormat.V1,
+ metafile.TorrentFormat.V2,
+ metafile.TorrentFormat.HYBRID,
+ ],
+ )
+ @pytest.mark.parametrize('piece_length', [2**14, 2**15, 2**16])
+ @pytest.mark.parametrize('private', [True, False])
+ def test_create_info(self, path, torrent_format, piece_length, private):
+ our_info, our_piece_layers = metafile.makeinfo(
+ path,
+ piece_length,
+ metafile.dummy,
+ private=private,
+ torrent_format=torrent_format,
+ )
+ lt_info, lt_piece_layers = metafile.makeinfo_lt(
+ path,
+ piece_length,
+ private=private,
+ torrent_format=torrent_format,
+ )
+
+ if (
+ torrent_format == metafile.TorrentFormat.HYBRID
+ and os.path.isdir(path)
+ and VersionSplit(LT_VERSION) <= VersionSplit('2.0.7.0')
+ ):
+ # Libtorrent didn't correctly follow the standard until version 2.0.7 included
+ # https://github.com/arvidn/libtorrent/commit/74d82a0cd7c2e9e3c4294901d7eb65e247050df4
+ # If last file is a padding, ignore that file and the last piece.
+ if our_info[b'files'][-1][b'path'][0] == b'.pad':
+ our_info[b'files'] = our_info[b'files'][:-1]
+ our_info[b'pieces'] = our_info[b'pieces'][:-32]
+ lt_info[b'pieces'] = lt_info[b'pieces'][:-32]
+
+ assert our_info == lt_info
+ assert our_piece_layers == lt_piece_layers