From dd136858f1ea40ad3c94191d647487fa4f31926c Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 18 Oct 2024 20:33:49 +0200 Subject: Adding upstream version 9.0.0. Signed-off-by: Daniel Baumann --- services/gitdiff/csv.go | 469 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 469 insertions(+) create mode 100644 services/gitdiff/csv.go (limited to 'services/gitdiff/csv.go') diff --git a/services/gitdiff/csv.go b/services/gitdiff/csv.go new file mode 100644 index 0000000..8db73c5 --- /dev/null +++ b/services/gitdiff/csv.go @@ -0,0 +1,469 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package gitdiff + +import ( + "encoding/csv" + "errors" + "io" +) + +const ( + unmappedColumn = -1 + maxRowsToInspect int = 10 + minRatioToMatch float32 = 0.8 +) + +// TableDiffCellType represents the type of a TableDiffCell. +type TableDiffCellType uint8 + +// TableDiffCellType possible values. +const ( + TableDiffCellUnchanged TableDiffCellType = iota + 1 + TableDiffCellChanged + TableDiffCellAdd + TableDiffCellDel + TableDiffCellMovedUnchanged + TableDiffCellMovedChanged +) + +// TableDiffCell represents a cell of a TableDiffRow +type TableDiffCell struct { + LeftCell string + RightCell string + Type TableDiffCellType +} + +// TableDiffRow represents a row of a TableDiffSection. +type TableDiffRow struct { + RowIdx int + Cells []*TableDiffCell +} + +// TableDiffSection represents a section of a DiffFile. +type TableDiffSection struct { + Rows []*TableDiffRow +} + +// csvReader wraps a csv.Reader which buffers the first rows. +type csvReader struct { + reader *csv.Reader + buffer [][]string + line int + eof bool +} + +// ErrorUndefinedCell is for when a row, column coordinates do not exist in the CSV +var ErrorUndefinedCell = errors.New("undefined cell") + +// createCsvReader creates a csvReader and fills the buffer +func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error) { + csv := &csvReader{reader: reader} + csv.buffer = make([][]string, bufferRowCount) + for i := 0; i < bufferRowCount && !csv.eof; i++ { + row, err := csv.readNextRow() + if err != nil { + return nil, err + } + csv.buffer[i] = row + } + csv.line = bufferRowCount + return csv, nil +} + +// GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned. +func (csv *csvReader) GetRow(row int) ([]string, error) { + if row < len(csv.buffer) && row >= 0 { + return csv.buffer[row], nil + } + if csv.eof { + return nil, nil + } + for { + fields, err := csv.readNextRow() + if err != nil { + return nil, err + } + if csv.eof { + return nil, nil + } + csv.line++ + if csv.line-1 == row { + return fields, nil + } + } +} + +func (csv *csvReader) readNextRow() ([]string, error) { + if csv.eof { + return nil, nil + } + row, err := csv.reader.Read() + if err != nil { + if err != io.EOF { + return nil, err + } + csv.eof = true + } + return row, nil +} + +// CreateCsvDiff creates a tabular diff based on two CSV readers. +func CreateCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) { + if baseReader != nil && headReader != nil { + return createCsvDiff(diffFile, baseReader, headReader) + } + + if baseReader != nil { + return createCsvDiffSingle(baseReader, TableDiffCellDel) + } + return createCsvDiffSingle(headReader, TableDiffCellAdd) +} + +// createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted. +func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) { + var rows []*TableDiffRow + i := 1 + for { + row, err := reader.Read() + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + cells := make([]*TableDiffCell, len(row)) + for j := 0; j < len(row); j++ { + if celltype == TableDiffCellDel { + cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype} + } else { + cells[j] = &TableDiffCell{RightCell: row[j], Type: celltype} + } + } + rows = append(rows, &TableDiffRow{RowIdx: i, Cells: cells}) + i++ + } + + return []*TableDiffSection{{Rows: rows}}, nil +} + +func createCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) { + // Given the baseReader and headReader, we are going to create CSV Reader for each, baseCSVReader and b respectively + baseCSVReader, err := createCsvReader(baseReader, maxRowsToInspect) + if err != nil { + return nil, err + } + headCSVReader, err := createCsvReader(headReader, maxRowsToInspect) + if err != nil { + return nil, err + } + + // Initializing the mappings of base to head (a2bColMap) and head to base (b2aColMap) columns + a2bColMap, b2aColMap := getColumnMapping(baseCSVReader, headCSVReader) + + // Determines how many cols there will be in the diff table, which includes deleted columns from base and added columns to base + numDiffTableCols := len(a2bColMap) + countUnmappedColumns(b2aColMap) + if len(a2bColMap) < len(b2aColMap) { + numDiffTableCols = len(b2aColMap) + countUnmappedColumns(a2bColMap) + } + + // createDiffTableRow takes the row # of the `a` line and `b` line of a diff (starting from 1), 0 if the line doesn't exist (undefined) + // in the base or head respectively. + // Returns a TableDiffRow which has the row index + createDiffTableRow := func(aLineNum, bLineNum int) (*TableDiffRow, error) { + // diffTableCells is a row of the diff table. It will have a cells for added, deleted, changed, and unchanged content, thus either + // the same size as the head table or bigger + diffTableCells := make([]*TableDiffCell, numDiffTableCols) + var bRow *[]string + if bLineNum > 0 { + row, err := headCSVReader.GetRow(bLineNum - 1) + if err != nil { + return nil, err + } + bRow = &row + } + var aRow *[]string + if aLineNum > 0 { + row, err := baseCSVReader.GetRow(aLineNum - 1) + if err != nil { + return nil, err + } + aRow = &row + } + if aRow == nil && bRow == nil { + // No content + return nil, nil + } + + aIndex := 0 // tracks where we are in the a2bColMap + bIndex := 0 // tracks where we are in the b2aColMap + colsAdded := 0 // incremented whenever we found a column was added + colsDeleted := 0 // incrememted whenever a column was deleted + + // We loop until both the aIndex and bIndex are greater than their col map, which then we are done + for aIndex < len(a2bColMap) || bIndex < len(b2aColMap) { + // Starting from where aIndex is currently pointing, we see if the map is -1 (dleeted) and if is, create column to note that, increment, and look at the next aIndex + for aIndex < len(a2bColMap) && a2bColMap[aIndex] == -1 && (bIndex >= len(b2aColMap) || aIndex <= bIndex) { + var aCell string + if aRow != nil { + if cell, err := getCell(*aRow, aIndex); err != nil { + if err != ErrorUndefinedCell { + return nil, err + } + } else { + aCell = cell + } + } + diffTableCells[bIndex+colsDeleted] = &TableDiffCell{LeftCell: aCell, Type: TableDiffCellDel} + aIndex++ + colsDeleted++ + } + + // aIndex is now pointing to a column that also exists in b, or is at the end of a2bColMap. If the former, + // we can just increment aIndex until it points to a -1 column or one greater than the current bIndex + for aIndex < len(a2bColMap) && a2bColMap[aIndex] != -1 { + aIndex++ + } + + // Starting from where bIndex is currently pointing, we see if the map is -1 (added) and if is, create column to note that, increment, and look at the next aIndex + for bIndex < len(b2aColMap) && b2aColMap[bIndex] == -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) { + var bCell string + cellType := TableDiffCellAdd + if bRow != nil { + if cell, err := getCell(*bRow, bIndex); err != nil { + if err != ErrorUndefinedCell { + return nil, err + } + } else { + bCell = cell + } + } else { + cellType = TableDiffCellDel + } + diffTableCells[bIndex+colsDeleted] = &TableDiffCell{RightCell: bCell, Type: cellType} + bIndex++ + colsAdded++ + } + + // aIndex is now pointing to a column that also exists in a, or is at the end of b2aColMap. If the former, + // we get the a col and b col values (if they exist), figure out if they are the same or not, and if the column moved, and add it to the diff table + for bIndex < len(b2aColMap) && b2aColMap[bIndex] != -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) { + var diffTableCell TableDiffCell + + var aCell *string + // get the aCell value if the aRow exists + if aRow != nil { + if cell, err := getCell(*aRow, b2aColMap[bIndex]); err != nil { + if err != ErrorUndefinedCell { + return nil, err + } + } else { + aCell = &cell + diffTableCell.LeftCell = cell + } + } else { + diffTableCell.Type = TableDiffCellAdd + } + + var bCell *string + // get the bCell value if the bRow exists + if bRow != nil { + if cell, err := getCell(*bRow, bIndex); err != nil { + if err != ErrorUndefinedCell { + return nil, err + } + } else { + bCell = &cell + diffTableCell.RightCell = cell + } + } else { + diffTableCell.Type = TableDiffCellDel + } + + // if both a and b have a row that exists, compare the value and determine if the row has moved + if aCell != nil && bCell != nil { + moved := ((bIndex + colsDeleted) != (b2aColMap[bIndex] + colsAdded)) + if *aCell != *bCell { + if moved { + diffTableCell.Type = TableDiffCellMovedChanged + } else { + diffTableCell.Type = TableDiffCellChanged + } + } else { + if moved { + diffTableCell.Type = TableDiffCellMovedUnchanged + } else { + diffTableCell.Type = TableDiffCellUnchanged + } + diffTableCell.LeftCell = "" + } + } + + // Add the diff column to the diff row + diffTableCells[bIndex+colsDeleted] = &diffTableCell + bIndex++ + } + } + + return &TableDiffRow{RowIdx: bLineNum, Cells: diffTableCells}, nil + } + + // diffTableSections are TableDiffSections which represent the diffTableSections we get when doing a diff, each will be its own table in the view + var diffTableSections []*TableDiffSection + + for i, section := range diffFile.Sections { + // Each section has multiple diffTableRows + var diffTableRows []*TableDiffRow + lines := tryMergeLines(section.Lines) + // Loop through the merged lines to get each row of the CSV diff table for this section + for j, line := range lines { + if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) { + diffTableRow, err := createDiffTableRow(1, 1) + if err != nil { + return nil, err + } + if diffTableRow != nil { + diffTableRows = append(diffTableRows, diffTableRow) + } + } + diffTableRow, err := createDiffTableRow(line[0], line[1]) + if err != nil { + return nil, err + } + if diffTableRow != nil { + diffTableRows = append(diffTableRows, diffTableRow) + } + } + + if len(diffTableRows) > 0 { + diffTableSections = append(diffTableSections, &TableDiffSection{Rows: diffTableRows}) + } + } + + return diffTableSections, nil +} + +// getColumnMapping creates a mapping of columns between a and b +func getColumnMapping(baseCSVReader, headCSVReader *csvReader) ([]int, []int) { + baseRow, _ := baseCSVReader.GetRow(0) + headRow, _ := headCSVReader.GetRow(0) + + base2HeadColMap := []int{} + head2BaseColMap := []int{} + + if baseRow != nil { + base2HeadColMap = make([]int, len(baseRow)) + } + if headRow != nil { + head2BaseColMap = make([]int, len(headRow)) + } + + // Initializes all head2base mappings to be unmappedColumn (-1) + for i := 0; i < len(head2BaseColMap); i++ { + head2BaseColMap[i] = unmappedColumn + } + + // Loops through the baseRow and see if there is a match in the head row + for i := 0; i < len(baseRow); i++ { + base2HeadColMap[i] = unmappedColumn + baseCell, err := getCell(baseRow, i) + if err == nil { + for j := 0; j < len(headRow); j++ { + if head2BaseColMap[j] == -1 { + headCell, err := getCell(headRow, j) + if err == nil && baseCell == headCell { + base2HeadColMap[i] = j + head2BaseColMap[j] = i + break + } + } + } + } + } + + tryMapColumnsByContent(baseCSVReader, base2HeadColMap, headCSVReader, head2BaseColMap) + tryMapColumnsByContent(headCSVReader, head2BaseColMap, baseCSVReader, base2HeadColMap) + + return base2HeadColMap, head2BaseColMap +} + +// tryMapColumnsByContent tries to map missing columns by the content of the first lines. +func tryMapColumnsByContent(baseCSVReader *csvReader, base2HeadColMap []int, headCSVReader *csvReader, head2BaseColMap []int) { + for i := 0; i < len(base2HeadColMap); i++ { + headStart := 0 + for base2HeadColMap[i] == unmappedColumn && headStart < len(head2BaseColMap) { + if head2BaseColMap[headStart] == unmappedColumn { + rows := min(maxRowsToInspect, max(0, min(len(baseCSVReader.buffer), len(headCSVReader.buffer))-1)) + same := 0 + for j := 1; j <= rows; j++ { + baseCell, baseErr := getCell(baseCSVReader.buffer[j], i) + headCell, headErr := getCell(headCSVReader.buffer[j], headStart) + if baseErr == nil && headErr == nil && baseCell == headCell { + same++ + } + } + if (float32(same) / float32(rows)) > minRatioToMatch { + base2HeadColMap[i] = headStart + head2BaseColMap[headStart] = i + } + } + headStart++ + } + } +} + +// getCell returns the specific cell or nil if not present. +func getCell(row []string, column int) (string, error) { + if column < len(row) { + return row[column], nil + } + return "", ErrorUndefinedCell +} + +// countUnmappedColumns returns the count of unmapped columns. +func countUnmappedColumns(mapping []int) int { + count := 0 + for i := 0; i < len(mapping); i++ { + if mapping[i] == unmappedColumn { + count++ + } + } + return count +} + +// tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered. +func tryMergeLines(lines []*DiffLine) [][2]int { + ids := make([][2]int, len(lines)) + + i := 0 + for _, line := range lines { + if line.Type != DiffLineSection { + ids[i][0] = line.LeftIdx + ids[i][1] = line.RightIdx + i++ + } + } + + ids = ids[:i] + + result := make([][2]int, len(ids)) + + j := 0 + for i = 0; i < len(ids); i++ { + if ids[i][0] == 0 { + if j > 0 && result[j-1][1] == 0 { + temp := j + for temp > 0 && result[temp-1][1] == 0 { + temp-- + } + result[temp][1] = ids[i][1] + continue + } + } + result[j] = ids[i] + j++ + } + + return result[:j] +} -- cgit v1.2.3