diff options
author | Junio C Hamano <gitster@pobox.com> | 2015-10-16 00:43:52 +0200 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2015-10-16 00:43:53 +0200 |
commit | 6ff518f593729a39f1314c9c05e3758f5ec19162 (patch) | |
tree | b49163899b0ad9e9e56b662cf4b2ab6046dc107c /git-p4.py | |
parent | Merge branch 'js/gc-with-stale-symref' (diff) | |
parent | git-p4: add Git LFS backend for large file system (diff) | |
download | git-6ff518f593729a39f1314c9c05e3758f5ec19162.tar.xz git-6ff518f593729a39f1314c9c05e3758f5ec19162.zip |
Merge branch 'ls/p4-lfs'
Teach "git p4" to send large blobs outside the repository by
talking to Git LFS.
* ls/p4-lfs:
git-p4: add Git LFS backend for large file system
git-p4: add support for large file systems
git-p4: check free space during streaming
git-p4: add file streaming progress in verbose mode
git-p4: return an empty list if a list config has no values
git-p4: add gitConfigInt reader
git-p4: add optional type specifier to gitConfig reader
Diffstat (limited to 'git-p4.py')
-rwxr-xr-x | git-p4.py | 270 |
1 files changed, 254 insertions, 16 deletions
@@ -22,6 +22,8 @@ import platform import re import shutil import stat +import zipfile +import zlib try: from subprocess import CalledProcessError @@ -104,6 +106,16 @@ def chdir(path, is_client_path=False): path = os.getcwd() os.environ['PWD'] = path +def calcDiskFree(): + """Return free space in bytes on the disk of the given dirname.""" + if platform.system() == 'Windows': + free_bytes = ctypes.c_ulonglong(0) + ctypes.windll.kernel32.GetDiskFreeSpaceExW(ctypes.c_wchar_p(os.getcwd()), None, None, ctypes.pointer(free_bytes)) + return free_bytes.value + else: + st = os.statvfs(os.getcwd()) + return st.f_bavail * st.f_frsize + def die(msg): if verbose: raise Exception(msg) @@ -602,9 +614,12 @@ def gitBranchExists(branch): _gitConfig = {} -def gitConfig(key): +def gitConfig(key, typeSpecifier=None): if not _gitConfig.has_key(key): - cmd = [ "git", "config", key ] + cmd = [ "git", "config" ] + if typeSpecifier: + cmd += [ typeSpecifier ] + cmd += [ key ] s = read_pipe(cmd, ignore_error=True) _gitConfig[key] = s.strip() return _gitConfig[key] @@ -615,16 +630,26 @@ def gitConfigBool(key): in the config.""" if not _gitConfig.has_key(key): - cmd = [ "git", "config", "--bool", key ] + _gitConfig[key] = gitConfig(key, '--bool') == "true" + return _gitConfig[key] + +def gitConfigInt(key): + if not _gitConfig.has_key(key): + cmd = [ "git", "config", "--int", key ] s = read_pipe(cmd, ignore_error=True) v = s.strip() - _gitConfig[key] = v == "true" + try: + _gitConfig[key] = int(gitConfig(key, '--int')) + except ValueError: + _gitConfig[key] = None return _gitConfig[key] def gitConfigList(key): if not _gitConfig.has_key(key): s = read_pipe(["git", "config", "--get-all", key], ignore_error=True) _gitConfig[key] = s.strip().split(os.linesep) + if _gitConfig[key] == ['']: + _gitConfig[key] = [] return _gitConfig[key] def p4BranchesInGit(branchesAreInRemotes=True): @@ -907,6 +932,182 @@ def wildcard_present(path): m = re.search("[*#@%]", path) return m is not None +class LargeFileSystem(object): + """Base class for large file system support.""" + + def __init__(self, writeToGitStream): + self.largeFiles = set() + self.writeToGitStream = writeToGitStream + + def generatePointer(self, cloneDestination, contentFile): + """Return the content of a pointer file that is stored in Git instead of + the actual content.""" + assert False, "Method 'generatePointer' required in " + self.__class__.__name__ + + def pushFile(self, localLargeFile): + """Push the actual content which is not stored in the Git repository to + a server.""" + assert False, "Method 'pushFile' required in " + self.__class__.__name__ + + def hasLargeFileExtension(self, relPath): + return reduce( + lambda a, b: a or b, + [relPath.endswith('.' + e) for e in gitConfigList('git-p4.largeFileExtensions')], + False + ) + + def generateTempFile(self, contents): + contentFile = tempfile.NamedTemporaryFile(prefix='git-p4-large-file', delete=False) + for d in contents: + contentFile.write(d) + contentFile.close() + return contentFile.name + + def exceedsLargeFileThreshold(self, relPath, contents): + if gitConfigInt('git-p4.largeFileThreshold'): + contentsSize = sum(len(d) for d in contents) + if contentsSize > gitConfigInt('git-p4.largeFileThreshold'): + return True + if gitConfigInt('git-p4.largeFileCompressedThreshold'): + contentsSize = sum(len(d) for d in contents) + if contentsSize <= gitConfigInt('git-p4.largeFileCompressedThreshold'): + return False + contentTempFile = self.generateTempFile(contents) + compressedContentFile = tempfile.NamedTemporaryFile(prefix='git-p4-large-file', delete=False) + zf = zipfile.ZipFile(compressedContentFile.name, mode='w') + zf.write(contentTempFile, compress_type=zipfile.ZIP_DEFLATED) + zf.close() + compressedContentsSize = zf.infolist()[0].compress_size + os.remove(contentTempFile) + os.remove(compressedContentFile.name) + if compressedContentsSize > gitConfigInt('git-p4.largeFileCompressedThreshold'): + return True + return False + + def addLargeFile(self, relPath): + self.largeFiles.add(relPath) + + def removeLargeFile(self, relPath): + self.largeFiles.remove(relPath) + + def isLargeFile(self, relPath): + return relPath in self.largeFiles + + def processContent(self, git_mode, relPath, contents): + """Processes the content of git fast import. This method decides if a + file is stored in the large file system and handles all necessary + steps.""" + if self.exceedsLargeFileThreshold(relPath, contents) or self.hasLargeFileExtension(relPath): + contentTempFile = self.generateTempFile(contents) + (git_mode, contents, localLargeFile) = self.generatePointer(contentTempFile) + + # Move temp file to final location in large file system + largeFileDir = os.path.dirname(localLargeFile) + if not os.path.isdir(largeFileDir): + os.makedirs(largeFileDir) + shutil.move(contentTempFile, localLargeFile) + self.addLargeFile(relPath) + if gitConfigBool('git-p4.largeFilePush'): + self.pushFile(localLargeFile) + if verbose: + sys.stderr.write("%s moved to large file system (%s)\n" % (relPath, localLargeFile)) + return (git_mode, contents) + +class MockLFS(LargeFileSystem): + """Mock large file system for testing.""" + + def generatePointer(self, contentFile): + """The pointer content is the original content prefixed with "pointer-". + The local filename of the large file storage is derived from the file content. + """ + with open(contentFile, 'r') as f: + content = next(f) + gitMode = '100644' + pointerContents = 'pointer-' + content + localLargeFile = os.path.join(os.getcwd(), '.git', 'mock-storage', 'local', content[:-1]) + return (gitMode, pointerContents, localLargeFile) + + def pushFile(self, localLargeFile): + """The remote filename of the large file storage is the same as the local + one but in a different directory. + """ + remotePath = os.path.join(os.path.dirname(localLargeFile), '..', 'remote') + if not os.path.exists(remotePath): + os.makedirs(remotePath) + shutil.copyfile(localLargeFile, os.path.join(remotePath, os.path.basename(localLargeFile))) + +class GitLFS(LargeFileSystem): + """Git LFS as backend for the git-p4 large file system. + See https://git-lfs.github.com/ for details.""" + + def __init__(self, *args): + LargeFileSystem.__init__(self, *args) + self.baseGitAttributes = [] + + def generatePointer(self, contentFile): + """Generate a Git LFS pointer for the content. Return LFS Pointer file + mode and content which is stored in the Git repository instead of + the actual content. Return also the new location of the actual + content. + """ + pointerProcess = subprocess.Popen( + ['git', 'lfs', 'pointer', '--file=' + contentFile], + stdout=subprocess.PIPE + ) + pointerFile = pointerProcess.stdout.read() + if pointerProcess.wait(): + os.remove(contentFile) + die('git-lfs pointer command failed. Did you install the extension?') + pointerContents = [i+'\n' for i in pointerFile.split('\n')[2:][:-1]] + oid = pointerContents[1].split(' ')[1].split(':')[1][:-1] + localLargeFile = os.path.join( + os.getcwd(), + '.git', 'lfs', 'objects', oid[:2], oid[2:4], + oid, + ) + # LFS Spec states that pointer files should not have the executable bit set. + gitMode = '100644' + return (gitMode, pointerContents, localLargeFile) + + def pushFile(self, localLargeFile): + uploadProcess = subprocess.Popen( + ['git', 'lfs', 'push', '--object-id', 'origin', os.path.basename(localLargeFile)] + ) + if uploadProcess.wait(): + die('git-lfs push command failed. Did you define a remote?') + + def generateGitAttributes(self): + return ( + self.baseGitAttributes + + [ + '\n', + '#\n', + '# Git LFS (see https://git-lfs.github.com/)\n', + '#\n', + ] + + ['*.' + f.replace(' ', '[[:space:]]') + ' filter=lfs -text\n' + for f in sorted(gitConfigList('git-p4.largeFileExtensions')) + ] + + ['/' + f.replace(' ', '[[:space:]]') + ' filter=lfs -text\n' + for f in sorted(self.largeFiles) if not self.hasLargeFileExtension(f) + ] + ) + + def addLargeFile(self, relPath): + LargeFileSystem.addLargeFile(self, relPath) + self.writeToGitStream('100644', '.gitattributes', self.generateGitAttributes()) + + def removeLargeFile(self, relPath): + LargeFileSystem.removeLargeFile(self, relPath) + self.writeToGitStream('100644', '.gitattributes', self.generateGitAttributes()) + + def processContent(self, git_mode, relPath, contents): + if relPath == '.gitattributes': + self.baseGitAttributes = contents + return (git_mode, self.generateGitAttributes()) + else: + return LargeFileSystem.processContent(self, git_mode, relPath, contents) + class Command: def __init__(self): self.usage = "usage: %prog [options]" @@ -1080,6 +1281,9 @@ class P4Submit(Command, P4UserMap): self.p4HasMoveCommand = p4_has_move_command() self.branch = None + if gitConfig('git-p4.largeFileSystem'): + die("Large file system not supported for git-p4 submit command. Please remove it from config.") + def check(self): if len(p4CmdList("opened ...")) > 0: die("You have files opened with perforce! Close them before starting the sync.") @@ -2030,6 +2234,13 @@ class P4Sync(Command, P4UserMap): self.clientSpecDirs = None self.tempBranches = [] self.tempBranchLocation = "git-p4-tmp" + self.largeFileSystem = None + + if gitConfig('git-p4.largeFileSystem'): + largeFileSystemConstructor = globals()[gitConfig('git-p4.largeFileSystem')] + self.largeFileSystem = largeFileSystemConstructor( + lambda git_mode, relPath, contents: self.writeToGitStream(git_mode, relPath, contents) + ) if gitConfig("git-p4.syncFromOrigin") == "false": self.syncWithOrigin = False @@ -2150,13 +2361,22 @@ class P4Sync(Command, P4UserMap): return branches + def writeToGitStream(self, gitMode, relPath, contents): + self.gitStream.write('M %s inline %s\n' % (gitMode, relPath)) + self.gitStream.write('data %d\n' % sum(len(d) for d in contents)) + for d in contents: + self.gitStream.write(d) + self.gitStream.write('\n') + # output one file from the P4 stream # - helper for streamP4Files def streamOneP4File(self, file, contents): relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes) if verbose: - sys.stderr.write("%s\n" % relPath) + size = int(self.stream_file['fileSize']) + sys.stdout.write('\r%s --> %s (%i MB)\n' % (file['depotFile'], relPath, size/1024/1024)) + sys.stdout.flush() (type_base, type_mods) = split_p4_type(file["type"]) @@ -2235,24 +2455,21 @@ class P4Sync(Command, P4UserMap): if self.verbose: print 'Path with non-ASCII characters detected. Used %s to encode: %s ' % (encoding, relPath) - self.gitStream.write("M %s inline %s\n" % (git_mode, relPath)) + if self.largeFileSystem: + (git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents) - # total length... - length = 0 - for d in contents: - length = length + len(d) - - self.gitStream.write("data %d\n" % length) - for d in contents: - self.gitStream.write(d) - self.gitStream.write("\n") + self.writeToGitStream(git_mode, relPath, contents) def streamOneP4Deletion(self, file): relPath = self.stripRepoPath(file['path'], self.branchPrefixes) if verbose: - sys.stderr.write("delete %s\n" % relPath) + sys.stdout.write("delete %s\n" % relPath) + sys.stdout.flush() self.gitStream.write("D %s\n" % relPath) + if self.largeFileSystem and self.largeFileSystem.isLargeFile(relPath): + self.largeFileSystem.removeLargeFile(relPath) + # handle another chunk of streaming data def streamP4FilesCb(self, marshalled): @@ -2262,6 +2479,14 @@ class P4Sync(Command, P4UserMap): if marshalled["code"] == "error": if "data" in marshalled: err = marshalled["data"].rstrip() + + if not err and 'fileSize' in self.stream_file: + required_bytes = int((4 * int(self.stream_file["fileSize"])) - calcDiskFree()) + if required_bytes > 0: + err = 'Not enough space left on %s! Free at least %i MB.' % ( + os.getcwd(), required_bytes/1024/1024 + ) + if err: f = None if self.stream_have_file_info: @@ -2290,10 +2515,23 @@ class P4Sync(Command, P4UserMap): # 'data' field we need to append to our array for k in marshalled.keys(): if k == 'data': + if 'streamContentSize' not in self.stream_file: + self.stream_file['streamContentSize'] = 0 + self.stream_file['streamContentSize'] += len(marshalled['data']) self.stream_contents.append(marshalled['data']) else: self.stream_file[k] = marshalled[k] + if (verbose and + 'streamContentSize' in self.stream_file and + 'fileSize' in self.stream_file and + 'depotFile' in self.stream_file): + size = int(self.stream_file["fileSize"]) + if size > 0: + progress = 100*self.stream_file['streamContentSize']/size + sys.stdout.write('\r%s %d%% (%i MB)' % (self.stream_file['depotFile'], progress, int(size/1024/1024))) + sys.stdout.flush() + self.stream_have_file_info = True # Stream directly from "p4 files" into "git fast-import" |