From e68b9d00a6e05b3a941f63ffb696f91e554ac5ec Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 18 Oct 2024 20:33:49 +0200 Subject: Adding upstream version 9.0.3. Signed-off-by: Daniel Baumann --- services/gitdiff/csv.go | 469 +++++++ services/gitdiff/csv_test.go | 229 ++++ services/gitdiff/gitdiff.go | 1396 ++++++++++++++++++++ services/gitdiff/gitdiff_test.go | 671 ++++++++++ services/gitdiff/highlightdiff.go | 227 ++++ services/gitdiff/highlightdiff_test.go | 125 ++ services/gitdiff/main_test.go | 18 + services/gitdiff/testdata/academic-module/HEAD | 1 + services/gitdiff/testdata/academic-module/config | 10 + .../gitdiff/testdata/academic-module/description | 1 + services/gitdiff/testdata/academic-module/index | Bin 0 -> 46960 bytes .../gitdiff/testdata/academic-module/info/exclude | 6 + .../gitdiff/testdata/academic-module/logs/HEAD | 1 + .../academic-module/logs/refs/heads/master | 1 + .../academic-module/logs/refs/remotes/origin/HEAD | 1 + ...ck-597efbc3613c7ba790e33b178fd9fc1fe17b4245.idx | Bin 0 -> 65332 bytes ...k-597efbc3613c7ba790e33b178fd9fc1fe17b4245.pack | Bin 0 -> 1167905 bytes .../gitdiff/testdata/academic-module/packed-refs | 2 + .../testdata/academic-module/refs/heads/master | 1 + .../academic-module/refs/remotes/origin/HEAD | 1 + 20 files changed, 3160 insertions(+) create mode 100644 services/gitdiff/csv.go create mode 100644 services/gitdiff/csv_test.go create mode 100644 services/gitdiff/gitdiff.go create mode 100644 services/gitdiff/gitdiff_test.go create mode 100644 services/gitdiff/highlightdiff.go create mode 100644 services/gitdiff/highlightdiff_test.go create mode 100644 services/gitdiff/main_test.go create mode 100644 services/gitdiff/testdata/academic-module/HEAD create mode 100644 services/gitdiff/testdata/academic-module/config create mode 100644 services/gitdiff/testdata/academic-module/description create mode 100644 services/gitdiff/testdata/academic-module/index create mode 100644 services/gitdiff/testdata/academic-module/info/exclude create mode 100644 services/gitdiff/testdata/academic-module/logs/HEAD create mode 100644 services/gitdiff/testdata/academic-module/logs/refs/heads/master create mode 100644 services/gitdiff/testdata/academic-module/logs/refs/remotes/origin/HEAD create mode 100644 services/gitdiff/testdata/academic-module/objects/pack/pack-597efbc3613c7ba790e33b178fd9fc1fe17b4245.idx create mode 100644 services/gitdiff/testdata/academic-module/objects/pack/pack-597efbc3613c7ba790e33b178fd9fc1fe17b4245.pack create mode 100644 services/gitdiff/testdata/academic-module/packed-refs create mode 100644 services/gitdiff/testdata/academic-module/refs/heads/master create mode 100644 services/gitdiff/testdata/academic-module/refs/remotes/origin/HEAD (limited to 'services/gitdiff') diff --git a/services/gitdiff/csv.go b/services/gitdiff/csv.go new file mode 100644 index 0000000..8db73c5 --- /dev/null +++ b/services/gitdiff/csv.go @@ -0,0 +1,469 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package gitdiff + +import ( + "encoding/csv" + "errors" + "io" +) + +const ( + unmappedColumn = -1 + maxRowsToInspect int = 10 + minRatioToMatch float32 = 0.8 +) + +// TableDiffCellType represents the type of a TableDiffCell. +type TableDiffCellType uint8 + +// TableDiffCellType possible values. +const ( + TableDiffCellUnchanged TableDiffCellType = iota + 1 + TableDiffCellChanged + TableDiffCellAdd + TableDiffCellDel + TableDiffCellMovedUnchanged + TableDiffCellMovedChanged +) + +// TableDiffCell represents a cell of a TableDiffRow +type TableDiffCell struct { + LeftCell string + RightCell string + Type TableDiffCellType +} + +// TableDiffRow represents a row of a TableDiffSection. +type TableDiffRow struct { + RowIdx int + Cells []*TableDiffCell +} + +// TableDiffSection represents a section of a DiffFile. +type TableDiffSection struct { + Rows []*TableDiffRow +} + +// csvReader wraps a csv.Reader which buffers the first rows. +type csvReader struct { + reader *csv.Reader + buffer [][]string + line int + eof bool +} + +// ErrorUndefinedCell is for when a row, column coordinates do not exist in the CSV +var ErrorUndefinedCell = errors.New("undefined cell") + +// createCsvReader creates a csvReader and fills the buffer +func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error) { + csv := &csvReader{reader: reader} + csv.buffer = make([][]string, bufferRowCount) + for i := 0; i < bufferRowCount && !csv.eof; i++ { + row, err := csv.readNextRow() + if err != nil { + return nil, err + } + csv.buffer[i] = row + } + csv.line = bufferRowCount + return csv, nil +} + +// GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned. +func (csv *csvReader) GetRow(row int) ([]string, error) { + if row < len(csv.buffer) && row >= 0 { + return csv.buffer[row], nil + } + if csv.eof { + return nil, nil + } + for { + fields, err := csv.readNextRow() + if err != nil { + return nil, err + } + if csv.eof { + return nil, nil + } + csv.line++ + if csv.line-1 == row { + return fields, nil + } + } +} + +func (csv *csvReader) readNextRow() ([]string, error) { + if csv.eof { + return nil, nil + } + row, err := csv.reader.Read() + if err != nil { + if err != io.EOF { + return nil, err + } + csv.eof = true + } + return row, nil +} + +// CreateCsvDiff creates a tabular diff based on two CSV readers. +func CreateCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) { + if baseReader != nil && headReader != nil { + return createCsvDiff(diffFile, baseReader, headReader) + } + + if baseReader != nil { + return createCsvDiffSingle(baseReader, TableDiffCellDel) + } + return createCsvDiffSingle(headReader, TableDiffCellAdd) +} + +// createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted. +func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) { + var rows []*TableDiffRow + i := 1 + for { + row, err := reader.Read() + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + cells := make([]*TableDiffCell, len(row)) + for j := 0; j < len(row); j++ { + if celltype == TableDiffCellDel { + cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype} + } else { + cells[j] = &TableDiffCell{RightCell: row[j], Type: celltype} + } + } + rows = append(rows, &TableDiffRow{RowIdx: i, Cells: cells}) + i++ + } + + return []*TableDiffSection{{Rows: rows}}, nil +} + +func createCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) { + // Given the baseReader and headReader, we are going to create CSV Reader for each, baseCSVReader and b respectively + baseCSVReader, err := createCsvReader(baseReader, maxRowsToInspect) + if err != nil { + return nil, err + } + headCSVReader, err := createCsvReader(headReader, maxRowsToInspect) + if err != nil { + return nil, err + } + + // Initializing the mappings of base to head (a2bColMap) and head to base (b2aColMap) columns + a2bColMap, b2aColMap := getColumnMapping(baseCSVReader, headCSVReader) + + // Determines how many cols there will be in the diff table, which includes deleted columns from base and added columns to base + numDiffTableCols := len(a2bColMap) + countUnmappedColumns(b2aColMap) + if len(a2bColMap) < len(b2aColMap) { + numDiffTableCols = len(b2aColMap) + countUnmappedColumns(a2bColMap) + } + + // createDiffTableRow takes the row # of the `a` line and `b` line of a diff (starting from 1), 0 if the line doesn't exist (undefined) + // in the base or head respectively. + // Returns a TableDiffRow which has the row index + createDiffTableRow := func(aLineNum, bLineNum int) (*TableDiffRow, error) { + // diffTableCells is a row of the diff table. It will have a cells for added, deleted, changed, and unchanged content, thus either + // the same size as the head table or bigger + diffTableCells := make([]*TableDiffCell, numDiffTableCols) + var bRow *[]string + if bLineNum > 0 { + row, err := headCSVReader.GetRow(bLineNum - 1) + if err != nil { + return nil, err + } + bRow = &row + } + var aRow *[]string + if aLineNum > 0 { + row, err := baseCSVReader.GetRow(aLineNum - 1) + if err != nil { + return nil, err + } + aRow = &row + } + if aRow == nil && bRow == nil { + // No content + return nil, nil + } + + aIndex := 0 // tracks where we are in the a2bColMap + bIndex := 0 // tracks where we are in the b2aColMap + colsAdded := 0 // incremented whenever we found a column was added + colsDeleted := 0 // incrememted whenever a column was deleted + + // We loop until both the aIndex and bIndex are greater than their col map, which then we are done + for aIndex < len(a2bColMap) || bIndex < len(b2aColMap) { + // Starting from where aIndex is currently pointing, we see if the map is -1 (dleeted) and if is, create column to note that, increment, and look at the next aIndex + for aIndex < len(a2bColMap) && a2bColMap[aIndex] == -1 && (bIndex >= len(b2aColMap) || aIndex <= bIndex) { + var aCell string + if aRow != nil { + if cell, err := getCell(*aRow, aIndex); err != nil { + if err != ErrorUndefinedCell { + return nil, err + } + } else { + aCell = cell + } + } + diffTableCells[bIndex+colsDeleted] = &TableDiffCell{LeftCell: aCell, Type: TableDiffCellDel} + aIndex++ + colsDeleted++ + } + + // aIndex is now pointing to a column that also exists in b, or is at the end of a2bColMap. If the former, + // we can just increment aIndex until it points to a -1 column or one greater than the current bIndex + for aIndex < len(a2bColMap) && a2bColMap[aIndex] != -1 { + aIndex++ + } + + // Starting from where bIndex is currently pointing, we see if the map is -1 (added) and if is, create column to note that, increment, and look at the next aIndex + for bIndex < len(b2aColMap) && b2aColMap[bIndex] == -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) { + var bCell string + cellType := TableDiffCellAdd + if bRow != nil { + if cell, err := getCell(*bRow, bIndex); err != nil { + if err != ErrorUndefinedCell { + return nil, err + } + } else { + bCell = cell + } + } else { + cellType = TableDiffCellDel + } + diffTableCells[bIndex+colsDeleted] = &TableDiffCell{RightCell: bCell, Type: cellType} + bIndex++ + colsAdded++ + } + + // aIndex is now pointing to a column that also exists in a, or is at the end of b2aColMap. If the former, + // we get the a col and b col values (if they exist), figure out if they are the same or not, and if the column moved, and add it to the diff table + for bIndex < len(b2aColMap) && b2aColMap[bIndex] != -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) { + var diffTableCell TableDiffCell + + var aCell *string + // get the aCell value if the aRow exists + if aRow != nil { + if cell, err := getCell(*aRow, b2aColMap[bIndex]); err != nil { + if err != ErrorUndefinedCell { + return nil, err + } + } else { + aCell = &cell + diffTableCell.LeftCell = cell + } + } else { + diffTableCell.Type = TableDiffCellAdd + } + + var bCell *string + // get the bCell value if the bRow exists + if bRow != nil { + if cell, err := getCell(*bRow, bIndex); err != nil { + if err != ErrorUndefinedCell { + return nil, err + } + } else { + bCell = &cell + diffTableCell.RightCell = cell + } + } else { + diffTableCell.Type = TableDiffCellDel + } + + // if both a and b have a row that exists, compare the value and determine if the row has moved + if aCell != nil && bCell != nil { + moved := ((bIndex + colsDeleted) != (b2aColMap[bIndex] + colsAdded)) + if *aCell != *bCell { + if moved { + diffTableCell.Type = TableDiffCellMovedChanged + } else { + diffTableCell.Type = TableDiffCellChanged + } + } else { + if moved { + diffTableCell.Type = TableDiffCellMovedUnchanged + } else { + diffTableCell.Type = TableDiffCellUnchanged + } + diffTableCell.LeftCell = "" + } + } + + // Add the diff column to the diff row + diffTableCells[bIndex+colsDeleted] = &diffTableCell + bIndex++ + } + } + + return &TableDiffRow{RowIdx: bLineNum, Cells: diffTableCells}, nil + } + + // diffTableSections are TableDiffSections which represent the diffTableSections we get when doing a diff, each will be its own table in the view + var diffTableSections []*TableDiffSection + + for i, section := range diffFile.Sections { + // Each section has multiple diffTableRows + var diffTableRows []*TableDiffRow + lines := tryMergeLines(section.Lines) + // Loop through the merged lines to get each row of the CSV diff table for this section + for j, line := range lines { + if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) { + diffTableRow, err := createDiffTableRow(1, 1) + if err != nil { + return nil, err + } + if diffTableRow != nil { + diffTableRows = append(diffTableRows, diffTableRow) + } + } + diffTableRow, err := createDiffTableRow(line[0], line[1]) + if err != nil { + return nil, err + } + if diffTableRow != nil { + diffTableRows = append(diffTableRows, diffTableRow) + } + } + + if len(diffTableRows) > 0 { + diffTableSections = append(diffTableSections, &TableDiffSection{Rows: diffTableRows}) + } + } + + return diffTableSections, nil +} + +// getColumnMapping creates a mapping of columns between a and b +func getColumnMapping(baseCSVReader, headCSVReader *csvReader) ([]int, []int) { + baseRow, _ := baseCSVReader.GetRow(0) + headRow, _ := headCSVReader.GetRow(0) + + base2HeadColMap := []int{} + head2BaseColMap := []int{} + + if baseRow != nil { + base2HeadColMap = make([]int, len(baseRow)) + } + if headRow != nil { + head2BaseColMap = make([]int, len(headRow)) + } + + // Initializes all head2base mappings to be unmappedColumn (-1) + for i := 0; i < len(head2BaseColMap); i++ { + head2BaseColMap[i] = unmappedColumn + } + + // Loops through the baseRow and see if there is a match in the head row + for i := 0; i < len(baseRow); i++ { + base2HeadColMap[i] = unmappedColumn + baseCell, err := getCell(baseRow, i) + if err == nil { + for j := 0; j < len(headRow); j++ { + if head2BaseColMap[j] == -1 { + headCell, err := getCell(headRow, j) + if err == nil && baseCell == headCell { + base2HeadColMap[i] = j + head2BaseColMap[j] = i + break + } + } + } + } + } + + tryMapColumnsByContent(baseCSVReader, base2HeadColMap, headCSVReader, head2BaseColMap) + tryMapColumnsByContent(headCSVReader, head2BaseColMap, baseCSVReader, base2HeadColMap) + + return base2HeadColMap, head2BaseColMap +} + +// tryMapColumnsByContent tries to map missing columns by the content of the first lines. +func tryMapColumnsByContent(baseCSVReader *csvReader, base2HeadColMap []int, headCSVReader *csvReader, head2BaseColMap []int) { + for i := 0; i < len(base2HeadColMap); i++ { + headStart := 0 + for base2HeadColMap[i] == unmappedColumn && headStart < len(head2BaseColMap) { + if head2BaseColMap[headStart] == unmappedColumn { + rows := min(maxRowsToInspect, max(0, min(len(baseCSVReader.buffer), len(headCSVReader.buffer))-1)) + same := 0 + for j := 1; j <= rows; j++ { + baseCell, baseErr := getCell(baseCSVReader.buffer[j], i) + headCell, headErr := getCell(headCSVReader.buffer[j], headStart) + if baseErr == nil && headErr == nil && baseCell == headCell { + same++ + } + } + if (float32(same) / float32(rows)) > minRatioToMatch { + base2HeadColMap[i] = headStart + head2BaseColMap[headStart] = i + } + } + headStart++ + } + } +} + +// getCell returns the specific cell or nil if not present. +func getCell(row []string, column int) (string, error) { + if column < len(row) { + return row[column], nil + } + return "", ErrorUndefinedCell +} + +// countUnmappedColumns returns the count of unmapped columns. +func countUnmappedColumns(mapping []int) int { + count := 0 + for i := 0; i < len(mapping); i++ { + if mapping[i] == unmappedColumn { + count++ + } + } + return count +} + +// tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered. +func tryMergeLines(lines []*DiffLine) [][2]int { + ids := make([][2]int, len(lines)) + + i := 0 + for _, line := range lines { + if line.Type != DiffLineSection { + ids[i][0] = line.LeftIdx + ids[i][1] = line.RightIdx + i++ + } + } + + ids = ids[:i] + + result := make([][2]int, len(ids)) + + j := 0 + for i = 0; i < len(ids); i++ { + if ids[i][0] == 0 { + if j > 0 && result[j-1][1] == 0 { + temp := j + for temp > 0 && result[temp-1][1] == 0 { + temp-- + } + result[temp][1] = ids[i][1] + continue + } + } + result[j] = ids[i] + j++ + } + + return result[:j] +} diff --git a/services/gitdiff/csv_test.go b/services/gitdiff/csv_test.go new file mode 100644 index 0000000..1dbe616 --- /dev/null +++ b/services/gitdiff/csv_test.go @@ -0,0 +1,229 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package gitdiff + +import ( + "encoding/csv" + "strings" + "testing" + + "code.gitea.io/gitea/models/db" + csv_module "code.gitea.io/gitea/modules/csv" + "code.gitea.io/gitea/modules/setting" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCSVDiff(t *testing.T) { + cases := []struct { + diff string + base string + head string + cells [][]TableDiffCellType + }{ + // case 0 - initial commit of a csv + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -0,0 +1,2 @@ ++col1,col2 ++a,a`, + base: "", + head: `col1,col2 +a,a`, + cells: [][]TableDiffCellType{ + {TableDiffCellAdd, TableDiffCellAdd}, + {TableDiffCellAdd, TableDiffCellAdd}, + }, + }, + // case 1 - adding 1 row at end + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,2 +1,3 @@ + col1,col2 +-a,a ++a,a ++b,b`, + base: `col1,col2 +a,a`, + head: `col1,col2 +a,a +b,b`, + cells: [][]TableDiffCellType{ + {TableDiffCellUnchanged, TableDiffCellUnchanged}, + {TableDiffCellUnchanged, TableDiffCellUnchanged}, + {TableDiffCellAdd, TableDiffCellAdd}, + }, + }, + // case 2 - row deleted + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,3 +1,2 @@ + col1,col2 +-a,a + b,b`, + base: `col1,col2 +a,a +b,b`, + head: `col1,col2 +b,b`, + cells: [][]TableDiffCellType{ + {TableDiffCellUnchanged, TableDiffCellUnchanged}, + {TableDiffCellDel, TableDiffCellDel}, + {TableDiffCellUnchanged, TableDiffCellUnchanged}, + }, + }, + // case 3 - row changed + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,2 +1,2 @@ + col1,col2 +-b,b ++b,c`, + base: `col1,col2 +b,b`, + head: `col1,col2 +b,c`, + cells: [][]TableDiffCellType{ + {TableDiffCellUnchanged, TableDiffCellUnchanged}, + {TableDiffCellUnchanged, TableDiffCellChanged}, + }, + }, + // case 4 - all deleted + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,2 +0,0 @@ +-col1,col2 +-b,c`, + base: `col1,col2 +b,c`, + head: "", + cells: [][]TableDiffCellType{ + {TableDiffCellDel, TableDiffCellDel}, + {TableDiffCellDel, TableDiffCellDel}, + }, + }, + // case 5 - renames first column + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,3 +1,3 @@ +-col1,col2,col3 ++cola,col2,col3 + a,b,c`, + base: `col1,col2,col3 +a,b,c`, + head: `cola,col2,col3 +a,b,c`, + cells: [][]TableDiffCellType{ + {TableDiffCellDel, TableDiffCellAdd, TableDiffCellUnchanged, TableDiffCellUnchanged}, + {TableDiffCellDel, TableDiffCellAdd, TableDiffCellUnchanged, TableDiffCellUnchanged}, + }, + }, + // case 6 - inserts a column after first, deletes last column + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,2 +1,2 @@ +-col1,col2,col3 +-a,b,c ++col1,col1a,col2 ++a,d,b`, + base: `col1,col2,col3 +a,b,c`, + head: `col1,col1a,col2 +a,d,b`, + cells: [][]TableDiffCellType{ + {TableDiffCellUnchanged, TableDiffCellAdd, TableDiffCellDel, TableDiffCellMovedUnchanged}, + {TableDiffCellUnchanged, TableDiffCellAdd, TableDiffCellDel, TableDiffCellMovedUnchanged}, + }, + }, + // case 7 - deletes first column, inserts column after last + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,2 +1,2 @@ +-col1,col2,col3 +-a,b,c ++col2,col3,col4 ++b,c,d`, + base: `col1,col2,col3 +a,b,c`, + head: `col2,col3,col4 +b,c,d`, + cells: [][]TableDiffCellType{ + {TableDiffCellDel, TableDiffCellUnchanged, TableDiffCellUnchanged, TableDiffCellAdd}, + {TableDiffCellDel, TableDiffCellUnchanged, TableDiffCellUnchanged, TableDiffCellAdd}, + }, + }, + // case 8 - two columns deleted, 2 added + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,2 +1,2 @@ +-col1,col2,col +-a,b,c ++col3,col4,col5 ++c,d,e`, + base: `col1,col2,col3 +a,b,c`, + head: `col3,col4,col5 +c,d,e`, + cells: [][]TableDiffCellType{ + {TableDiffCellDel, TableDiffCellMovedUnchanged, TableDiffCellDel, TableDiffCellAdd, TableDiffCellAdd}, + {TableDiffCellDel, TableDiffCellMovedUnchanged, TableDiffCellDel, TableDiffCellAdd, TableDiffCellAdd}, + }, + }, + } + + for n, c := range cases { + diff, err := ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.diff), "") + if err != nil { + t.Errorf("ParsePatch failed: %s", err) + } + + var baseReader *csv.Reader + if len(c.base) > 0 { + baseReader, err = csv_module.CreateReaderAndDetermineDelimiter(nil, strings.NewReader(c.base)) + if err != nil { + t.Errorf("CreateReaderAndDetermineDelimiter failed: %s", err) + } + } + var headReader *csv.Reader + if len(c.head) > 0 { + headReader, err = csv_module.CreateReaderAndDetermineDelimiter(nil, strings.NewReader(c.head)) + if err != nil { + t.Errorf("CreateReaderAndDetermineDelimiter failed: %s", err) + } + } + + result, err := CreateCsvDiff(diff.Files[0], baseReader, headReader) + require.NoError(t, err) + assert.Len(t, result, 1, "case %d: should be one section", n) + + section := result[0] + assert.Len(t, section.Rows, len(c.cells), "case %d: should be %d rows", n, len(c.cells)) + + for i, row := range section.Rows { + assert.Len(t, row.Cells, len(c.cells[i]), "case %d: row %d should have %d cells", n, i, len(c.cells[i])) + for j, cell := range row.Cells { + assert.Equal(t, c.cells[i][j], cell.Type, "case %d: row %d cell %d should be equal", n, i, j) + } + } + } +} diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go new file mode 100644 index 0000000..8f376a1 --- /dev/null +++ b/services/gitdiff/gitdiff.go @@ -0,0 +1,1396 @@ +// Copyright 2014 The Gogs Authors. All rights reserved. +// Copyright 2019 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package gitdiff + +import ( + "bufio" + "bytes" + "cmp" + "context" + "fmt" + "html" + "html/template" + "io" + "net/url" + "strings" + "time" + + "code.gitea.io/gitea/models/db" + git_model "code.gitea.io/gitea/models/git" + issues_model "code.gitea.io/gitea/models/issues" + pull_model "code.gitea.io/gitea/models/pull" + user_model "code.gitea.io/gitea/models/user" + "code.gitea.io/gitea/modules/analyze" + "code.gitea.io/gitea/modules/charset" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/highlight" + "code.gitea.io/gitea/modules/lfs" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/translation" + + "github.com/sergi/go-diff/diffmatchpatch" + stdcharset "golang.org/x/net/html/charset" + "golang.org/x/text/encoding" + "golang.org/x/text/transform" +) + +// DiffLineType represents the type of DiffLine. +type DiffLineType uint8 + +// DiffLineType possible values. +const ( + DiffLinePlain DiffLineType = iota + 1 + DiffLineAdd + DiffLineDel + DiffLineSection +) + +// DiffFileType represents the type of DiffFile. +type DiffFileType uint8 + +// DiffFileType possible values. +const ( + DiffFileAdd DiffFileType = iota + 1 + DiffFileChange + DiffFileDel + DiffFileRename + DiffFileCopy +) + +// DiffLineExpandDirection represents the DiffLineSection expand direction +type DiffLineExpandDirection uint8 + +// DiffLineExpandDirection possible values. +const ( + DiffLineExpandNone DiffLineExpandDirection = iota + 1 + DiffLineExpandSingle + DiffLineExpandUpDown + DiffLineExpandUp + DiffLineExpandDown +) + +// DiffLine represents a line difference in a DiffSection. +type DiffLine struct { + LeftIdx int + RightIdx int + Match int + Type DiffLineType + Content string + Conversations []issues_model.CodeConversation + SectionInfo *DiffLineSectionInfo +} + +// DiffLineSectionInfo represents diff line section meta data +type DiffLineSectionInfo struct { + Path string + LastLeftIdx int + LastRightIdx int + LeftIdx int + RightIdx int + LeftHunkSize int + RightHunkSize int +} + +// BlobExcerptChunkSize represent max lines of excerpt +const BlobExcerptChunkSize = 20 + +// GetType returns the type of DiffLine. +func (d *DiffLine) GetType() int { + return int(d.Type) +} + +// GetHTMLDiffLineType returns the diff line type name for HTML +func (d *DiffLine) GetHTMLDiffLineType() string { + switch d.Type { + case DiffLineAdd: + return "add" + case DiffLineDel: + return "del" + case DiffLineSection: + return "tag" + } + return "same" +} + +// CanComment returns whether a line can get commented +func (d *DiffLine) CanComment() bool { + return len(d.Conversations) == 0 && d.Type != DiffLineSection +} + +// GetCommentSide returns the comment side of the first comment, if not set returns empty string +func (d *DiffLine) GetCommentSide() string { + if len(d.Conversations) == 0 || len(d.Conversations[0]) == 0 { + return "" + } + return d.Conversations[0][0].DiffSide() +} + +// GetLineTypeMarker returns the line type marker +func (d *DiffLine) GetLineTypeMarker() string { + if strings.IndexByte(" +-", d.Content[0]) > -1 { + return d.Content[0:1] + } + return "" +} + +// GetBlobExcerptQuery builds query string to get blob excerpt +func (d *DiffLine) GetBlobExcerptQuery() string { + query := fmt.Sprintf( + "last_left=%d&last_right=%d&"+ + "left=%d&right=%d&"+ + "left_hunk_size=%d&right_hunk_size=%d&"+ + "path=%s", + d.SectionInfo.LastLeftIdx, d.SectionInfo.LastRightIdx, + d.SectionInfo.LeftIdx, d.SectionInfo.RightIdx, + d.SectionInfo.LeftHunkSize, d.SectionInfo.RightHunkSize, + url.QueryEscape(d.SectionInfo.Path)) + return query +} + +// GetExpandDirection gets DiffLineExpandDirection +func (d *DiffLine) GetExpandDirection() DiffLineExpandDirection { + if d.Type != DiffLineSection || d.SectionInfo == nil || d.SectionInfo.LeftIdx-d.SectionInfo.LastLeftIdx <= 1 || d.SectionInfo.RightIdx-d.SectionInfo.LastRightIdx <= 1 { + return DiffLineExpandNone + } + if d.SectionInfo.LastLeftIdx <= 0 && d.SectionInfo.LastRightIdx <= 0 { + return DiffLineExpandUp + } else if d.SectionInfo.RightIdx-d.SectionInfo.LastRightIdx > BlobExcerptChunkSize && d.SectionInfo.RightHunkSize > 0 { + return DiffLineExpandUpDown + } else if d.SectionInfo.LeftHunkSize <= 0 && d.SectionInfo.RightHunkSize <= 0 { + return DiffLineExpandDown + } + return DiffLineExpandSingle +} + +func getDiffLineSectionInfo(treePath, line string, lastLeftIdx, lastRightIdx int) *DiffLineSectionInfo { + leftLine, leftHunk, rightLine, righHunk := git.ParseDiffHunkString(line) + + return &DiffLineSectionInfo{ + Path: treePath, + LastLeftIdx: lastLeftIdx, + LastRightIdx: lastRightIdx, + LeftIdx: leftLine, + RightIdx: rightLine, + LeftHunkSize: leftHunk, + RightHunkSize: righHunk, + } +} + +// escape a line's content or return
needed for copy/paste purposes +func getLineContent(content string, locale translation.Locale) DiffInline { + if len(content) > 0 { + return DiffInlineWithUnicodeEscape(template.HTML(html.EscapeString(content)), locale) + } + return DiffInline{EscapeStatus: &charset.EscapeStatus{}, Content: "
"} +} + +// DiffSection represents a section of a DiffFile. +type DiffSection struct { + file *DiffFile + FileName string + Name string + Lines []*DiffLine +} + +var ( + addedCodePrefix = []byte(``) + removedCodePrefix = []byte(``) + codeTagSuffix = []byte(``) +) + +func diffToHTML(lineWrapperTags []string, diffs []diffmatchpatch.Diff, lineType DiffLineType) string { + buf := bytes.NewBuffer(nil) + // restore the line wrapper tags and , if necessary + for _, tag := range lineWrapperTags { + buf.WriteString(tag) + } + for _, diff := range diffs { + switch { + case diff.Type == diffmatchpatch.DiffEqual: + buf.WriteString(diff.Text) + case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd: + buf.Write(addedCodePrefix) + buf.WriteString(diff.Text) + buf.Write(codeTagSuffix) + case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel: + buf.Write(removedCodePrefix) + buf.WriteString(diff.Text) + buf.Write(codeTagSuffix) + } + } + for range lineWrapperTags { + buf.WriteString("") + } + return buf.String() +} + +// GetLine gets a specific line by type (add or del) and file line number +func (diffSection *DiffSection) GetLine(lineType DiffLineType, idx int) *DiffLine { + var ( + difference = 0 + addCount = 0 + delCount = 0 + matchDiffLine *DiffLine + ) + +LOOP: + for _, diffLine := range diffSection.Lines { + switch diffLine.Type { + case DiffLineAdd: + addCount++ + case DiffLineDel: + delCount++ + default: + if matchDiffLine != nil { + break LOOP + } + difference = diffLine.RightIdx - diffLine.LeftIdx + addCount = 0 + delCount = 0 + } + + switch lineType { + case DiffLineDel: + if diffLine.RightIdx == 0 && diffLine.LeftIdx == idx-difference { + matchDiffLine = diffLine + } + case DiffLineAdd: + if diffLine.LeftIdx == 0 && diffLine.RightIdx == idx+difference { + matchDiffLine = diffLine + } + } + } + + if addCount == delCount { + return matchDiffLine + } + return nil +} + +var diffMatchPatch = diffmatchpatch.New() + +func init() { + diffMatchPatch.DiffEditCost = 100 +} + +// DiffInline is a struct that has a content and escape status +type DiffInline struct { + EscapeStatus *charset.EscapeStatus + Content template.HTML +} + +// DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped +func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline { + status, content := charset.EscapeControlHTML(s, locale, charset.DiffContext) + return DiffInline{EscapeStatus: status, Content: content} +} + +// DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped +func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline { + highlighted, _ := highlight.Code(fileName, language, code) + status, content := charset.EscapeControlHTML(highlighted, locale, charset.DiffContext) + return DiffInline{EscapeStatus: status, Content: content} +} + +// GetComputedInlineDiffFor computes inline diff for the given line. +func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline { + if setting.Git.DisableDiffHighlight { + return getLineContent(diffLine.Content[1:], locale) + } + + var ( + compareDiffLine *DiffLine + diff1 string + diff2 string + ) + + language := "" + if diffSection.file != nil { + language = diffSection.file.Language + } + + // try to find equivalent diff line. ignore, otherwise + switch diffLine.Type { + case DiffLineSection: + return getLineContent(diffLine.Content[1:], locale) + case DiffLineAdd: + compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx) + if compareDiffLine == nil { + return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale) + } + diff1 = compareDiffLine.Content + diff2 = diffLine.Content + case DiffLineDel: + compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx) + if compareDiffLine == nil { + return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale) + } + diff1 = diffLine.Content + diff2 = compareDiffLine.Content + default: + if strings.IndexByte(" +-", diffLine.Content[0]) > -1 { + return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale) + } + return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content, locale) + } + + hcd := NewHighlightCodeDiff() + diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:]) + // it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back + // if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)" + diffHTML := diffToHTML(nil, diffRecord, diffLine.Type) + return DiffInlineWithUnicodeEscape(template.HTML(diffHTML), locale) +} + +// DiffFile represents a file diff. +type DiffFile struct { + Name string + NameHash string + OldName string + Index int + Addition, Deletion int + Type DiffFileType + IsCreated bool + IsDeleted bool + IsBin bool + IsLFSFile bool + IsRenamed bool + IsAmbiguous bool + IsSubmodule bool + Sections []*DiffSection + IsIncomplete bool + IsIncompleteLineTooLong bool + IsProtected bool + IsGenerated bool + IsVendored bool + IsViewed bool // User specific + HasChangedSinceLastReview bool // User specific + Language string + Mode string + OldMode string +} + +// GetType returns type of diff file. +func (diffFile *DiffFile) GetType() int { + return int(diffFile.Type) +} + +// GetTailSection creates a fake DiffLineSection if the last section is not the end of the file +func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommitID, rightCommitID string) *DiffSection { + if len(diffFile.Sections) == 0 || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile { + return nil + } + leftCommit, err := gitRepo.GetCommit(leftCommitID) + if err != nil { + return nil + } + rightCommit, err := gitRepo.GetCommit(rightCommitID) + if err != nil { + return nil + } + lastSection := diffFile.Sections[len(diffFile.Sections)-1] + lastLine := lastSection.Lines[len(lastSection.Lines)-1] + leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name) + rightLineCount := getCommitFileLineCount(rightCommit, diffFile.Name) + if leftLineCount <= lastLine.LeftIdx || rightLineCount <= lastLine.RightIdx { + return nil + } + tailDiffLine := &DiffLine{ + Type: DiffLineSection, + Content: " ", + SectionInfo: &DiffLineSectionInfo{ + Path: diffFile.Name, + LastLeftIdx: lastLine.LeftIdx, + LastRightIdx: lastLine.RightIdx, + LeftIdx: leftLineCount, + RightIdx: rightLineCount, + }, + } + tailSection := &DiffSection{FileName: diffFile.Name, Lines: []*DiffLine{tailDiffLine}} + return tailSection +} + +// GetDiffFileName returns the name of the diff file, or its old name in case it was deleted +func (diffFile *DiffFile) GetDiffFileName() string { + if diffFile.Name == "" { + return diffFile.OldName + } + return diffFile.Name +} + +func (diffFile *DiffFile) ShouldBeHidden() bool { + return diffFile.IsGenerated || diffFile.IsViewed +} + +func (diffFile *DiffFile) ModeTranslationKey(mode string) string { + switch mode { + case "040000": + return "git.filemode.directory" + case "100644": + return "git.filemode.normal_file" + case "100755": + return "git.filemode.executable_file" + case "120000": + return "git.filemode.symbolic_link" + case "160000": + return "git.filemode.submodule" + default: + return mode + } +} + +func getCommitFileLineCount(commit *git.Commit, filePath string) int { + blob, err := commit.GetBlobByPath(filePath) + if err != nil { + return 0 + } + lineCount, err := blob.GetBlobLineCount() + if err != nil { + return 0 + } + return lineCount +} + +// Diff represents a difference between two git trees. +type Diff struct { + Start, End string + NumFiles int + TotalAddition, TotalDeletion int + Files []*DiffFile + IsIncomplete bool + NumViewedFiles int // user-specific +} + +// LoadComments loads comments into each line +func (diff *Diff) LoadComments(ctx context.Context, issue *issues_model.Issue, currentUser *user_model.User, showOutdatedComments bool) error { + allConversations, err := issues_model.FetchCodeConversations(ctx, issue, currentUser, showOutdatedComments) + if err != nil { + return err + } + for _, file := range diff.Files { + if lineCommits, ok := allConversations[file.Name]; ok { + for _, section := range file.Sections { + for _, line := range section.Lines { + if conversations, ok := lineCommits[int64(line.LeftIdx*-1)]; ok { + line.Conversations = append(line.Conversations, conversations...) + } + if comments, ok := lineCommits[int64(line.RightIdx)]; ok { + line.Conversations = append(line.Conversations, comments...) + } + } + } + } + } + return nil +} + +const cmdDiffHead = "diff --git " + +// ParsePatch builds a Diff object from a io.Reader and some parameters. +func ParsePatch(ctx context.Context, maxLines, maxLineCharacters, maxFiles int, reader io.Reader, skipToFile string) (*Diff, error) { + log.Debug("ParsePatch(%d, %d, %d, ..., %s)", maxLines, maxLineCharacters, maxFiles, skipToFile) + var curFile *DiffFile + + skipping := skipToFile != "" + + diff := &Diff{Files: make([]*DiffFile, 0)} + + sb := strings.Builder{} + + // OK let's set a reasonable buffer size. + // This should be at least the size of maxLineCharacters or 4096 whichever is larger. + readerSize := maxLineCharacters + if readerSize < 4096 { + readerSize = 4096 + } + + input := bufio.NewReaderSize(reader, readerSize) + line, err := input.ReadString('\n') + if err != nil { + if err == io.EOF { + return diff, nil + } + return diff, err + } + + prepareValue := func(s, p string) string { + return strings.TrimSpace(strings.TrimPrefix(s, p)) + } + +parsingLoop: + for { + // 1. A patch file always begins with `diff --git ` + `a/path b/path` (possibly quoted) + // if it does not we have bad input! + if !strings.HasPrefix(line, cmdDiffHead) { + return diff, fmt.Errorf("invalid first file line: %s", line) + } + + if maxFiles > -1 && len(diff.Files) >= maxFiles { + lastFile := createDiffFile(diff, line) + diff.End = lastFile.Name + diff.IsIncomplete = true + _, err := io.Copy(io.Discard, reader) + if err != nil { + // By the definition of io.Copy this never returns io.EOF + return diff, fmt.Errorf("error during io.Copy: %w", err) + } + break parsingLoop + } + + curFile = createDiffFile(diff, line) + if skipping { + if curFile.Name != skipToFile { + line, err = skipToNextDiffHead(input) + if err != nil { + if err == io.EOF { + return diff, nil + } + return diff, err + } + continue + } + skipping = false + } + + diff.Files = append(diff.Files, curFile) + + // 2. It is followed by one or more extended header lines: + // + // old mode + // new mode + // deleted file mode + // new file mode + // copy from + // copy to + // rename from + // rename to + // similarity index + // dissimilarity index + // index .. + // + // * 6-digit octal numbers including the file type and file permission bits. + // * does not include the a/ and b/ prefixes + // * percentage of unchanged lines for similarity, percentage of changed + // lines dissimilarity as integer rounded down with terminal %. 100% => equal files. + // * The index line includes the blob object names before and after the change. + // The is included if the file mode does not change; otherwise, separate + // lines indicate the old and the new mode. + // 3. Following this header the "standard unified" diff format header may be encountered: (but not for every case...) + // + // --- a/ + // +++ b/ + // + // With multiple hunks + // + // @@ @@ + // +added line + // -removed line + // unchanged line + // + // 4. Binary files get: + // + // Binary files a/ and b/ differ + // + // but one of a/ and b/ could be /dev/null. + curFileLoop: + for { + line, err = input.ReadString('\n') + if err != nil { + if err != io.EOF { + return diff, err + } + break parsingLoop + } + + switch { + case strings.HasPrefix(line, cmdDiffHead): + break curFileLoop + case strings.HasPrefix(line, "old mode ") || + strings.HasPrefix(line, "new mode "): + + if strings.HasPrefix(line, "old mode ") { + curFile.OldMode = prepareValue(line, "old mode ") + } + if strings.HasPrefix(line, "new mode ") { + curFile.Mode = prepareValue(line, "new mode ") + } + + if strings.HasSuffix(line, " 160000\n") { + curFile.IsSubmodule = true + } + case strings.HasPrefix(line, "rename from "): + curFile.IsRenamed = true + curFile.Type = DiffFileRename + if curFile.IsAmbiguous { + curFile.OldName = prepareValue(line, "rename from ") + } + case strings.HasPrefix(line, "rename to "): + curFile.IsRenamed = true + curFile.Type = DiffFileRename + if curFile.IsAmbiguous { + curFile.Name = prepareValue(line, "rename to ") + curFile.IsAmbiguous = false + } + case strings.HasPrefix(line, "copy from "): + curFile.IsRenamed = true + curFile.Type = DiffFileCopy + if curFile.IsAmbiguous { + curFile.OldName = prepareValue(line, "copy from ") + } + case strings.HasPrefix(line, "copy to "): + curFile.IsRenamed = true + curFile.Type = DiffFileCopy + if curFile.IsAmbiguous { + curFile.Name = prepareValue(line, "copy to ") + curFile.IsAmbiguous = false + } + case strings.HasPrefix(line, "new file"): + curFile.Type = DiffFileAdd + curFile.IsCreated = true + if strings.HasPrefix(line, "new file mode ") { + curFile.Mode = prepareValue(line, "new file mode ") + } + if strings.HasSuffix(line, " 160000\n") { + curFile.IsSubmodule = true + } + case strings.HasPrefix(line, "deleted"): + curFile.Type = DiffFileDel + curFile.IsDeleted = true + if strings.HasSuffix(line, " 160000\n") { + curFile.IsSubmodule = true + } + case strings.HasPrefix(line, "index"): + if strings.HasSuffix(line, " 160000\n") { + curFile.IsSubmodule = true + } + case strings.HasPrefix(line, "similarity index 100%"): + curFile.Type = DiffFileRename + case strings.HasPrefix(line, "Binary"): + curFile.IsBin = true + case strings.HasPrefix(line, "--- "): + // Handle ambiguous filenames + if curFile.IsAmbiguous { + // The shortest string that can end up here is: + // "--- a\t\n" without the quotes. + // This line has a len() of 7 but doesn't contain a oldName. + // So the amount that the line need is at least 8 or more. + // The code will otherwise panic for a out-of-bounds. + if len(line) > 7 && line[4] == 'a' { + curFile.OldName = line[6 : len(line)-1] + if line[len(line)-2] == '\t' { + curFile.OldName = curFile.OldName[:len(curFile.OldName)-1] + } + } else { + curFile.OldName = "" + } + } + // Otherwise do nothing with this line + case strings.HasPrefix(line, "+++ "): + // Handle ambiguous filenames + if curFile.IsAmbiguous { + if len(line) > 6 && line[4] == 'b' { + curFile.Name = line[6 : len(line)-1] + if line[len(line)-2] == '\t' { + curFile.Name = curFile.Name[:len(curFile.Name)-1] + } + if curFile.OldName == "" { + curFile.OldName = curFile.Name + } + } else { + curFile.Name = curFile.OldName + } + curFile.IsAmbiguous = false + } + // Otherwise do nothing with this line, but now switch to parsing hunks + lineBytes, isFragment, err := parseHunks(ctx, curFile, maxLines, maxLineCharacters, input) + diff.TotalAddition += curFile.Addition + diff.TotalDeletion += curFile.Deletion + if err != nil { + if err != io.EOF { + return diff, err + } + break parsingLoop + } + sb.Reset() + _, _ = sb.Write(lineBytes) + for isFragment { + lineBytes, isFragment, err = input.ReadLine() + if err != nil { + // Now by the definition of ReadLine this cannot be io.EOF + return diff, fmt.Errorf("unable to ReadLine: %w", err) + } + _, _ = sb.Write(lineBytes) + } + line = sb.String() + sb.Reset() + + break curFileLoop + } + } + } + + // TODO: There are numerous issues with this: + // - we might want to consider detecting encoding while parsing but... + // - we're likely to fail to get the correct encoding here anyway as we won't have enough information + diffLineTypeBuffers := make(map[DiffLineType]*bytes.Buffer, 3) + diffLineTypeDecoders := make(map[DiffLineType]*encoding.Decoder, 3) + diffLineTypeBuffers[DiffLinePlain] = new(bytes.Buffer) + diffLineTypeBuffers[DiffLineAdd] = new(bytes.Buffer) + diffLineTypeBuffers[DiffLineDel] = new(bytes.Buffer) + for _, f := range diff.Files { + f.NameHash = git.HashFilePathForWebUI(f.Name) + + for _, buffer := range diffLineTypeBuffers { + buffer.Reset() + } + for _, sec := range f.Sections { + for _, l := range sec.Lines { + if l.Type == DiffLineSection { + continue + } + diffLineTypeBuffers[l.Type].WriteString(l.Content[1:]) + diffLineTypeBuffers[l.Type].WriteString("\n") + } + } + for lineType, buffer := range diffLineTypeBuffers { + diffLineTypeDecoders[lineType] = nil + if buffer.Len() == 0 { + continue + } + charsetLabel, err := charset.DetectEncoding(buffer.Bytes()) + if charsetLabel != "UTF-8" && err == nil { + encoding, _ := stdcharset.Lookup(charsetLabel) + if encoding != nil { + diffLineTypeDecoders[lineType] = encoding.NewDecoder() + } + } + } + for _, sec := range f.Sections { + for _, l := range sec.Lines { + decoder := diffLineTypeDecoders[l.Type] + if decoder != nil { + if c, _, err := transform.String(decoder, l.Content[1:]); err == nil { + l.Content = l.Content[0:1] + c + } + } + } + } + } + + diff.NumFiles = len(diff.Files) + return diff, nil +} + +func skipToNextDiffHead(input *bufio.Reader) (line string, err error) { + // need to skip until the next cmdDiffHead + var isFragment, wasFragment bool + var lineBytes []byte + for { + lineBytes, isFragment, err = input.ReadLine() + if err != nil { + return "", err + } + if wasFragment { + wasFragment = isFragment + continue + } + if bytes.HasPrefix(lineBytes, []byte(cmdDiffHead)) { + break + } + wasFragment = isFragment + } + line = string(lineBytes) + if isFragment { + var tail string + tail, err = input.ReadString('\n') + if err != nil { + return "", err + } + line += tail + } + return line, err +} + +func parseHunks(ctx context.Context, curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio.Reader) (lineBytes []byte, isFragment bool, err error) { + sb := strings.Builder{} + + var ( + curSection *DiffSection + curFileLinesCount int + curFileLFSPrefix bool + ) + + lastLeftIdx := -1 + leftLine, rightLine := 1, 1 + + for { + for isFragment { + curFile.IsIncomplete = true + curFile.IsIncompleteLineTooLong = true + _, isFragment, err = input.ReadLine() + if err != nil { + // Now by the definition of ReadLine this cannot be io.EOF + return nil, false, fmt.Errorf("unable to ReadLine: %w", err) + } + } + sb.Reset() + lineBytes, isFragment, err = input.ReadLine() + if err != nil { + if err == io.EOF { + return lineBytes, isFragment, err + } + err = fmt.Errorf("unable to ReadLine: %w", err) + return nil, false, err + } + if lineBytes[0] == 'd' { + // End of hunks + return lineBytes, isFragment, err + } + + switch lineBytes[0] { + case '@': + if maxLines > -1 && curFileLinesCount >= maxLines { + curFile.IsIncomplete = true + continue + } + + _, _ = sb.Write(lineBytes) + for isFragment { + // This is very odd indeed - we're in a section header and the line is too long + // This really shouldn't happen... + lineBytes, isFragment, err = input.ReadLine() + if err != nil { + // Now by the definition of ReadLine this cannot be io.EOF + return nil, false, fmt.Errorf("unable to ReadLine: %w", err) + } + _, _ = sb.Write(lineBytes) + } + line := sb.String() + + // Create a new section to represent this hunk + curSection = &DiffSection{file: curFile} + lastLeftIdx = -1 + curFile.Sections = append(curFile.Sections, curSection) + + lineSectionInfo := getDiffLineSectionInfo(curFile.Name, line, leftLine-1, rightLine-1) + diffLine := &DiffLine{ + Type: DiffLineSection, + Content: line, + SectionInfo: lineSectionInfo, + } + curSection.Lines = append(curSection.Lines, diffLine) + curSection.FileName = curFile.Name + // update line number. + leftLine = lineSectionInfo.LeftIdx + rightLine = lineSectionInfo.RightIdx + continue + case '\\': + if maxLines > -1 && curFileLinesCount >= maxLines { + curFile.IsIncomplete = true + continue + } + // This is used only to indicate that the current file does not have a terminal newline + if !bytes.Equal(lineBytes, []byte("\\ No newline at end of file")) { + return nil, false, fmt.Errorf("unexpected line in hunk: %s", string(lineBytes)) + } + // Technically this should be the end the file! + // FIXME: we should be putting a marker at the end of the file if there is no terminal new line + continue + case '+': + curFileLinesCount++ + curFile.Addition++ + if maxLines > -1 && curFileLinesCount >= maxLines { + curFile.IsIncomplete = true + continue + } + diffLine := &DiffLine{Type: DiffLineAdd, RightIdx: rightLine, Match: -1} + rightLine++ + if curSection == nil { + // Create a new section to represent this hunk + curSection = &DiffSection{file: curFile} + curFile.Sections = append(curFile.Sections, curSection) + lastLeftIdx = -1 + } + if lastLeftIdx > -1 { + diffLine.Match = lastLeftIdx + curSection.Lines[lastLeftIdx].Match = len(curSection.Lines) + lastLeftIdx++ + if lastLeftIdx >= len(curSection.Lines) || curSection.Lines[lastLeftIdx].Type != DiffLineDel { + lastLeftIdx = -1 + } + } + curSection.Lines = append(curSection.Lines, diffLine) + case '-': + curFileLinesCount++ + curFile.Deletion++ + if maxLines > -1 && curFileLinesCount >= maxLines { + curFile.IsIncomplete = true + continue + } + diffLine := &DiffLine{Type: DiffLineDel, LeftIdx: leftLine, Match: -1} + if leftLine > 0 { + leftLine++ + } + if curSection == nil { + // Create a new section to represent this hunk + curSection = &DiffSection{file: curFile} + curFile.Sections = append(curFile.Sections, curSection) + lastLeftIdx = -1 + } + if len(curSection.Lines) == 0 || curSection.Lines[len(curSection.Lines)-1].Type != DiffLineDel { + lastLeftIdx = len(curSection.Lines) + } + curSection.Lines = append(curSection.Lines, diffLine) + case ' ': + curFileLinesCount++ + if maxLines > -1 && curFileLinesCount >= maxLines { + curFile.IsIncomplete = true + continue + } + diffLine := &DiffLine{Type: DiffLinePlain, LeftIdx: leftLine, RightIdx: rightLine} + leftLine++ + rightLine++ + lastLeftIdx = -1 + if curSection == nil { + // Create a new section to represent this hunk + curSection = &DiffSection{file: curFile} + curFile.Sections = append(curFile.Sections, curSection) + } + curSection.Lines = append(curSection.Lines, diffLine) + default: + // This is unexpected + return nil, false, fmt.Errorf("unexpected line in hunk: %s", string(lineBytes)) + } + + line := string(lineBytes) + if isFragment { + curFile.IsIncomplete = true + curFile.IsIncompleteLineTooLong = true + for isFragment { + lineBytes, isFragment, err = input.ReadLine() + if err != nil { + // Now by the definition of ReadLine this cannot be io.EOF + return lineBytes, isFragment, fmt.Errorf("unable to ReadLine: %w", err) + } + } + } + if len(line) > maxLineCharacters { + curFile.IsIncomplete = true + curFile.IsIncompleteLineTooLong = true + line = line[:maxLineCharacters] + } + curSection.Lines[len(curSection.Lines)-1].Content = line + + // handle LFS + if line[1:] == lfs.MetaFileIdentifier { + curFileLFSPrefix = true + } else if curFileLFSPrefix && strings.HasPrefix(line[1:], lfs.MetaFileOidPrefix) { + oid := strings.TrimPrefix(line[1:], lfs.MetaFileOidPrefix) + if len(oid) == 64 { + m := &git_model.LFSMetaObject{Pointer: lfs.Pointer{Oid: oid}} + count, err := db.CountByBean(ctx, m) + + if err == nil && count > 0 { + curFile.IsBin = true + curFile.IsLFSFile = true + curSection.Lines = nil + lastLeftIdx = -1 + } + } + } + } +} + +func createDiffFile(diff *Diff, line string) *DiffFile { + // The a/ and b/ filenames are the same unless rename/copy is involved. + // Especially, even for a creation or a deletion, /dev/null is not used + // in place of the a/ or b/ filenames. + // + // When rename/copy is involved, file1 and file2 show the name of the + // source file of the rename/copy and the name of the file that rename/copy + // produces, respectively. + // + // Path names are quoted if necessary. + // + // This means that you should always be able to determine the file name even when there + // there is potential ambiguity... + // + // but we can be simpler with our heuristics by just forcing git to prefix things nicely + curFile := &DiffFile{ + Index: len(diff.Files) + 1, + Type: DiffFileChange, + Sections: make([]*DiffSection, 0, 10), + } + + rd := strings.NewReader(line[len(cmdDiffHead):] + " ") + curFile.Type = DiffFileChange + var oldNameAmbiguity, newNameAmbiguity bool + + curFile.OldName, oldNameAmbiguity = readFileName(rd) + curFile.Name, newNameAmbiguity = readFileName(rd) + if oldNameAmbiguity && newNameAmbiguity { + curFile.IsAmbiguous = true + // OK we should bet that the oldName and the newName are the same if they can be made to be same + // So we need to start again ... + if (len(line)-len(cmdDiffHead)-1)%2 == 0 { + // diff --git a/b b/b b/b b/b b/b b/b + // + midpoint := (len(line) + len(cmdDiffHead) - 1) / 2 + newl, old := line[len(cmdDiffHead):midpoint], line[midpoint+1:] + if len(newl) > 2 && len(old) > 2 && newl[2:] == old[2:] { + curFile.OldName = old[2:] + curFile.Name = old[2:] + } + } + } + + curFile.IsRenamed = curFile.Name != curFile.OldName + return curFile +} + +func readFileName(rd *strings.Reader) (string, bool) { + ambiguity := false + var name string + char, _ := rd.ReadByte() + _ = rd.UnreadByte() + if char == '"' { + _, _ = fmt.Fscanf(rd, "%q ", &name) + if len(name) == 0 { + log.Error("Reader has no file name: reader=%+v", rd) + return "", true + } + + if name[0] == '\\' { + name = name[1:] + } + } else { + // This technique is potentially ambiguous it may not be possible to uniquely identify the filenames from the diff line alone + ambiguity = true + _, _ = fmt.Fscanf(rd, "%s ", &name) + char, _ := rd.ReadByte() + _ = rd.UnreadByte() + for !(char == 0 || char == '"' || char == 'b') { + var suffix string + _, _ = fmt.Fscanf(rd, "%s ", &suffix) + name += " " + suffix + char, _ = rd.ReadByte() + _ = rd.UnreadByte() + } + } + if len(name) < 2 { + log.Error("Unable to determine name from reader: reader=%+v", rd) + return "", true + } + return name[2:], ambiguity +} + +// DiffOptions represents the options for a DiffRange +type DiffOptions struct { + BeforeCommitID string + AfterCommitID string + SkipTo string + MaxLines int + MaxLineCharacters int + MaxFiles int + WhitespaceBehavior git.TrustedCmdArgs + DirectComparison bool +} + +// GetDiff builds a Diff between two commits of a repository. +// Passing the empty string as beforeCommitID returns a diff from the parent commit. +// The whitespaceBehavior is either an empty string or a git flag +func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) { + repoPath := gitRepo.Path + + commit, err := gitRepo.GetCommit(opts.AfterCommitID) + if err != nil { + return nil, err + } + + cmdDiff := git.NewCommand(gitRepo.Ctx) + objectFormat, err := gitRepo.GetObjectFormat() + if err != nil { + return nil, err + } + + if (len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == objectFormat.EmptyObjectID().String()) && commit.ParentCount() == 0 { + cmdDiff.AddArguments("diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M"). + AddArguments(opts.WhitespaceBehavior...). + AddDynamicArguments(objectFormat.EmptyTree().String()). + AddDynamicArguments(opts.AfterCommitID) + } else { + actualBeforeCommitID := opts.BeforeCommitID + if len(actualBeforeCommitID) == 0 { + parentCommit, _ := commit.Parent(0) + actualBeforeCommitID = parentCommit.ID.String() + } + + cmdDiff.AddArguments("diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M"). + AddArguments(opts.WhitespaceBehavior...). + AddDynamicArguments(actualBeforeCommitID, opts.AfterCommitID) + opts.BeforeCommitID = actualBeforeCommitID + } + + // In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file + // so if we are using at least this version of git we don't have to tell ParsePatch to do + // the skipping for us + parsePatchSkipToFile := opts.SkipTo + if opts.SkipTo != "" && git.CheckGitVersionAtLeast("2.31") == nil { + cmdDiff.AddOptionFormat("--skip-to=%s", opts.SkipTo) + parsePatchSkipToFile = "" + } + + cmdDiff.AddDashesAndList(files...) + + reader, writer := io.Pipe() + defer func() { + _ = reader.Close() + _ = writer.Close() + }() + + go func() { + stderr := &bytes.Buffer{} + cmdDiff.SetDescription(fmt.Sprintf("GetDiffRange [repo_path: %s]", repoPath)) + if err := cmdDiff.Run(&git.RunOpts{ + Timeout: time.Duration(setting.Git.Timeout.Default) * time.Second, + Dir: repoPath, + Stdout: writer, + Stderr: stderr, + }); err != nil { + log.Error("error during GetDiff(git diff dir: %s): %v, stderr: %s", repoPath, err, stderr.String()) + } + + _ = writer.Close() + }() + + diff, err := ParsePatch(ctx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile) + if err != nil { + return nil, fmt.Errorf("unable to ParsePatch: %w", err) + } + diff.Start = opts.SkipTo + + checker, err := gitRepo.GitAttributeChecker(opts.AfterCommitID, git.LinguistAttributes...) + if err != nil { + return nil, fmt.Errorf("unable to GitAttributeChecker: %w", err) + } + defer checker.Close() + + for _, diffFile := range diff.Files { + gotVendor := false + gotGenerated := false + + attrs, err := checker.CheckPath(diffFile.Name) + if err != nil { + log.Error("checker.CheckPath(%s) failed: %v", diffFile.Name, err) + } else { + vendored := attrs["linguist-vendored"].Bool() + diffFile.IsVendored = vendored.Value() + gotVendor = vendored.Has() + + generated := attrs["linguist-generated"].Bool() + diffFile.IsGenerated = generated.Value() + gotGenerated = generated.Has() + + diffFile.Language = cmp.Or( + attrs["linguist-language"].String(), + attrs["gitlab-language"].Prefix(), + ) + } + + if !gotVendor { + diffFile.IsVendored = analyze.IsVendor(diffFile.Name) + } + if !gotGenerated { + diffFile.IsGenerated = analyze.IsGenerated(diffFile.Name) + } + + tailSection := diffFile.GetTailSection(gitRepo, opts.BeforeCommitID, opts.AfterCommitID) + if tailSection != nil { + diffFile.Sections = append(diffFile.Sections, tailSection) + } + } + + separator := "..." + if opts.DirectComparison { + separator = ".." + } + + diffPaths := []string{opts.BeforeCommitID + separator + opts.AfterCommitID} + if len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == objectFormat.EmptyObjectID().String() { + diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID} + } + diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...) + if err != nil && strings.Contains(err.Error(), "no merge base") { + // git >= 2.28 now returns an error if base and head have become unrelated. + // previously it would return the results of git diff --shortstat base head so let's try that... + diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID} + diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...) + } + if err != nil { + return nil, err + } + + return diff, nil +} + +type PullDiffStats struct { + TotalAddition, TotalDeletion int +} + +// GetPullDiffStats +func GetPullDiffStats(gitRepo *git.Repository, opts *DiffOptions) (*PullDiffStats, error) { + repoPath := gitRepo.Path + + diff := &PullDiffStats{} + + separator := "..." + if opts.DirectComparison { + separator = ".." + } + + objectFormat, err := gitRepo.GetObjectFormat() + if err != nil { + return nil, err + } + + diffPaths := []string{opts.BeforeCommitID + separator + opts.AfterCommitID} + if len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == objectFormat.EmptyObjectID().String() { + diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID} + } + + _, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...) + if err != nil && strings.Contains(err.Error(), "no merge base") { + // git >= 2.28 now returns an error if base and head have become unrelated. + // previously it would return the results of git diff --shortstat base head so let's try that... + diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID} + _, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...) + } + if err != nil { + return nil, err + } + + return diff, nil +} + +// SyncAndGetUserSpecificDiff is like GetDiff, except that user specific data such as which files the given user has already viewed on the given PR will also be set +// Additionally, the database asynchronously is updated if files have changed since the last review +func SyncAndGetUserSpecificDiff(ctx context.Context, userID int64, pull *issues_model.PullRequest, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) { + diff, err := GetDiff(ctx, gitRepo, opts, files...) + if err != nil { + return nil, err + } + review, err := pull_model.GetNewestReviewState(ctx, userID, pull.ID) + if err != nil || review == nil || review.UpdatedFiles == nil { + return diff, err + } + + latestCommit := opts.AfterCommitID + if latestCommit == "" { + latestCommit = pull.HeadBranch // opts.AfterCommitID is preferred because it handles PRs from forks correctly and the branch name doesn't + } + + changedFiles, err := gitRepo.GetFilesChangedBetween(review.CommitSHA, latestCommit) + // There are way too many possible errors. + // Examples are various git errors such as the commit the review was based on was gc'ed and hence doesn't exist anymore as well as unrecoverable errors where we should serve a 500 response + // Due to the current architecture and physical limitation of needing to compare explicit error messages, we can only choose one approach without the code getting ugly + // For SOME of the errors such as the gc'ed commit, it would be best to mark all files as changed + // But as that does not work for all potential errors, we simply mark all files as unchanged and drop the error which always works, even if not as good as possible + if err != nil { + log.Error("Could not get changed files between %s and %s for pull request %d in repo with path %s. Assuming no changes. Error: %w", review.CommitSHA, latestCommit, pull.Index, gitRepo.Path, err) + } + + filesChangedSinceLastDiff := make(map[string]pull_model.ViewedState) +outer: + for _, diffFile := range diff.Files { + fileViewedState := review.UpdatedFiles[diffFile.GetDiffFileName()] + + // Check whether it was previously detected that the file has changed since the last review + if fileViewedState == pull_model.HasChanged { + diffFile.HasChangedSinceLastReview = true + continue + } + + filename := diffFile.GetDiffFileName() + + // Check explicitly whether the file has changed since the last review + for _, changedFile := range changedFiles { + diffFile.HasChangedSinceLastReview = filename == changedFile + if diffFile.HasChangedSinceLastReview { + filesChangedSinceLastDiff[filename] = pull_model.HasChanged + continue outer // We don't want to check if the file is viewed here as that would fold the file, which is in this case unwanted + } + } + // Check whether the file has already been viewed + if fileViewedState == pull_model.Viewed { + diffFile.IsViewed = true + diff.NumViewedFiles++ + } + } + + // Explicitly store files that have changed in the database, if any is present at all. + // This has the benefit that the "Has Changed" attribute will be present as long as the user does not explicitly mark this file as viewed, so it will even survive a page reload after marking another file as viewed. + // On the other hand, this means that even if a commit reverting an unseen change is committed, the file will still be seen as changed. + if len(filesChangedSinceLastDiff) > 0 { + err := pull_model.UpdateReviewState(ctx, review.UserID, review.PullID, review.CommitSHA, filesChangedSinceLastDiff) + if err != nil { + log.Warn("Could not update review for user %d, pull %d, commit %s and the changed files %v: %v", review.UserID, review.PullID, review.CommitSHA, filesChangedSinceLastDiff, err) + return nil, err + } + } + + return diff, nil +} + +// CommentAsDiff returns c.Patch as *Diff +func CommentAsDiff(ctx context.Context, c *issues_model.Comment) (*Diff, error) { + diff, err := ParsePatch(ctx, setting.Git.MaxGitDiffLines, + setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.Patch), "") + if err != nil { + log.Error("Unable to parse patch: %v", err) + return nil, err + } + if len(diff.Files) == 0 { + return nil, fmt.Errorf("no file found for comment ID: %d", c.ID) + } + secs := diff.Files[0].Sections + if len(secs) == 0 { + return nil, fmt.Errorf("no sections found for comment ID: %d", c.ID) + } + return diff, nil +} + +// CommentMustAsDiff executes AsDiff and logs the error instead of returning +func CommentMustAsDiff(ctx context.Context, c *issues_model.Comment) *Diff { + if c == nil { + return nil + } + defer func() { + if err := recover(); err != nil { + log.Error("PANIC whilst retrieving diff for comment[%d] Error: %v\nStack: %s", c.ID, err, log.Stack(2)) + } + }() + diff, err := CommentAsDiff(ctx, c) + if err != nil { + log.Warn("CommentMustAsDiff: %v", err) + } + return diff +} + +// GetWhitespaceFlag returns git diff flag for treating whitespaces +func GetWhitespaceFlag(whitespaceBehavior string) git.TrustedCmdArgs { + whitespaceFlags := map[string]git.TrustedCmdArgs{ + "ignore-all": {"-w"}, + "ignore-change": {"-b"}, + "ignore-eol": {"--ignore-space-at-eol"}, + "show-all": nil, + } + + if flag, ok := whitespaceFlags[whitespaceBehavior]; ok { + return flag + } + log.Warn("unknown whitespace behavior: %q, default to 'show-all'", whitespaceBehavior) + return nil +} diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go new file mode 100644 index 0000000..f2c099d --- /dev/null +++ b/services/gitdiff/gitdiff_test.go @@ -0,0 +1,671 @@ +// Copyright 2014 The Gogs Authors. All rights reserved. +// Copyright 2019 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package gitdiff + +import ( + "strconv" + "strings" + "testing" + + "code.gitea.io/gitea/models/db" + issues_model "code.gitea.io/gitea/models/issues" + "code.gitea.io/gitea/models/unittest" + user_model "code.gitea.io/gitea/models/user" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/json" + "code.gitea.io/gitea/modules/setting" + + dmp "github.com/sergi/go-diff/diffmatchpatch" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDiffToHTML(t *testing.T) { + assert.Equal(t, "foo bar biz", diffToHTML(nil, []dmp.Diff{ + {Type: dmp.DiffEqual, Text: "foo "}, + {Type: dmp.DiffInsert, Text: "bar"}, + {Type: dmp.DiffDelete, Text: " baz"}, + {Type: dmp.DiffEqual, Text: " biz"}, + }, DiffLineAdd)) + + assert.Equal(t, "foo bar biz", diffToHTML(nil, []dmp.Diff{ + {Type: dmp.DiffEqual, Text: "foo "}, + {Type: dmp.DiffDelete, Text: "bar"}, + {Type: dmp.DiffInsert, Text: " baz"}, + {Type: dmp.DiffEqual, Text: " biz"}, + }, DiffLineDel)) +} + +func TestParsePatch_skipTo(t *testing.T) { + type testcase struct { + name string + gitdiff string + wantErr bool + addition int + deletion int + oldFilename string + filename string + skipTo string + } + tests := []testcase{ + { + name: "readme.md2readme.md", + gitdiff: `diff --git "a/A \\ B" "b/A \\ B" +--- "a/A \\ B" ++++ "b/A \\ B" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off +diff --git "\\a/README.md" "\\b/README.md" +--- "\\a/README.md" ++++ "\\b/README.md" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off +`, + addition: 4, + deletion: 1, + filename: "README.md", + oldFilename: "README.md", + skipTo: "README.md", + }, + { + name: "A \\ B", + gitdiff: `diff --git "a/A \\ B" "b/A \\ B" +--- "a/A \\ B" ++++ "b/A \\ B" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off`, + addition: 4, + deletion: 1, + filename: "A \\ B", + oldFilename: "A \\ B", + skipTo: "A \\ B", + }, + { + name: "A \\ B", + gitdiff: `diff --git "\\a/README.md" "\\b/README.md" +--- "\\a/README.md" ++++ "\\b/README.md" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off +diff --git "a/A \\ B" "b/A \\ B" +--- "a/A \\ B" ++++ "b/A \\ B" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off`, + addition: 4, + deletion: 1, + filename: "A \\ B", + oldFilename: "A \\ B", + skipTo: "A \\ B", + }, + { + name: "readme.md2readme.md", + gitdiff: `diff --git "a/A \\ B" "b/A \\ B" +--- "a/A \\ B" ++++ "b/A \\ B" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off +diff --git "a/A \\ B" "b/A \\ B" +--- "a/A \\ B" ++++ "b/A \\ B" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off +diff --git "\\a/README.md" "\\b/README.md" +--- "\\a/README.md" ++++ "\\b/README.md" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off +`, + addition: 4, + deletion: 1, + filename: "README.md", + oldFilename: "README.md", + skipTo: "README.md", + }, + } + for _, testcase := range tests { + t.Run(testcase.name, func(t *testing.T) { + got, err := ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff), testcase.skipTo) + if (err != nil) != testcase.wantErr { + t.Errorf("ParsePatch(%q) error = %v, wantErr %v", testcase.name, err, testcase.wantErr) + return + } + + gotMarshaled, _ := json.MarshalIndent(got, "", " ") + if got.NumFiles != 1 { + t.Errorf("ParsePath(%q) did not receive 1 file:\n%s", testcase.name, string(gotMarshaled)) + return + } + if got.TotalAddition != testcase.addition { + t.Errorf("ParsePath(%q) does not have correct totalAddition %d, wanted %d", testcase.name, got.TotalAddition, testcase.addition) + } + if got.TotalDeletion != testcase.deletion { + t.Errorf("ParsePath(%q) did not have correct totalDeletion %d, wanted %d", testcase.name, got.TotalDeletion, testcase.deletion) + } + file := got.Files[0] + if file.Addition != testcase.addition { + t.Errorf("ParsePath(%q) does not have correct file addition %d, wanted %d", testcase.name, file.Addition, testcase.addition) + } + if file.Deletion != testcase.deletion { + t.Errorf("ParsePath(%q) did not have correct file deletion %d, wanted %d", testcase.name, file.Deletion, testcase.deletion) + } + if file.OldName != testcase.oldFilename { + t.Errorf("ParsePath(%q) did not have correct OldName %q, wanted %q", testcase.name, file.OldName, testcase.oldFilename) + } + if file.Name != testcase.filename { + t.Errorf("ParsePath(%q) did not have correct Name %q, wanted %q", testcase.name, file.Name, testcase.filename) + } + }) + } +} + +func TestParsePatch_singlefile(t *testing.T) { + type testcase struct { + name string + gitdiff string + wantErr bool + addition int + deletion int + oldFilename string + filename string + } + + tests := []testcase{ + { + name: "readme.md2readme.md", + gitdiff: `diff --git "\\a/README.md" "\\b/README.md" +--- "\\a/README.md" ++++ "\\b/README.md" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off +`, + addition: 4, + deletion: 1, + filename: "README.md", + oldFilename: "README.md", + }, + { + name: "A \\ B", + gitdiff: `diff --git "a/A \\ B" "b/A \\ B" +--- "a/A \\ B" ++++ "b/A \\ B" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off`, + addition: 4, + deletion: 1, + filename: "A \\ B", + oldFilename: "A \\ B", + }, + { + name: "really weird filename", + gitdiff: `diff --git "\\a/a b/file b/a a/file" "\\b/a b/file b/a a/file" +index d2186f1..f5c8ed2 100644 +--- "\\a/a b/file b/a a/file" ` + ` ++++ "\\b/a b/file b/a a/file" ` + ` +@@ -1,3 +1,2 @@ + Create a weird file. + ` + ` +-and what does diff do here? +\ No newline at end of file`, + addition: 0, + deletion: 1, + filename: "a b/file b/a a/file", + oldFilename: "a b/file b/a a/file", + }, + { + name: "delete file with blanks", + gitdiff: `diff --git "\\a/file with blanks" "\\b/file with blanks" +deleted file mode 100644 +index 898651a..0000000 +--- "\\a/file with blanks" ` + ` ++++ /dev/null +@@ -1,5 +0,0 @@ +-a blank file +- +-has a couple o line +- +-the 5th line is the last +`, + addition: 0, + deletion: 5, + filename: "file with blanks", + oldFilename: "file with blanks", + }, + { + name: "rename a—as", + gitdiff: `diff --git "a/\360\243\220\265b\342\200\240vs" "b/a\342\200\224as" +similarity index 100% +rename from "\360\243\220\265b\342\200\240vs" +rename to "a\342\200\224as" +`, + addition: 0, + deletion: 0, + oldFilename: "𣐵b†vs", + filename: "a—as", + }, + { + name: "rename with spaces", + gitdiff: `diff --git "\\a/a b/file b/a a/file" "\\b/a b/a a/file b/b file" +similarity index 100% +rename from a b/file b/a a/file +rename to a b/a a/file b/b file +`, + oldFilename: "a b/file b/a a/file", + filename: "a b/a a/file b/b file", + }, + { + name: "ambiguous deleted", + gitdiff: `diff --git a/b b/b b/b b/b +deleted file mode 100644 +index 92e798b..0000000 +--- a/b b/b` + "\t" + ` ++++ /dev/null +@@ -1 +0,0 @@ +-b b/b +`, + oldFilename: "b b/b", + filename: "b b/b", + addition: 0, + deletion: 1, + }, + { + name: "ambiguous addition", + gitdiff: `diff --git a/b b/b b/b b/b +new file mode 100644 +index 0000000..92e798b +--- /dev/null ++++ b/b b/b` + "\t" + ` +@@ -0,0 +1 @@ ++b b/b +`, + oldFilename: "b b/b", + filename: "b b/b", + addition: 1, + deletion: 0, + }, + { + name: "rename", + gitdiff: `diff --git a/b b/b b/b b/b b/b b/b +similarity index 100% +rename from b b/b b/b b/b b/b +rename to b +`, + oldFilename: "b b/b b/b b/b b/b", + filename: "b", + }, + { + name: "ambiguous 1", + gitdiff: `diff --git a/b b/b b/b b/b b/b b/b +similarity index 100% +rename from b b/b b/b b/b b/b +rename to b +`, + oldFilename: "b b/b b/b b/b b/b", + filename: "b", + }, + { + name: "ambiguous 2", + gitdiff: `diff --git a/b b/b b/b b/b b/b b/b +similarity index 100% +rename from b b/b b/b b/b +rename to b b/b +`, + oldFilename: "b b/b b/b b/b", + filename: "b b/b", + }, + { + name: "minuses-and-pluses", + gitdiff: `diff --git a/minuses-and-pluses b/minuses-and-pluses +index 6961180..9ba1a00 100644 +--- a/minuses-and-pluses ++++ b/minuses-and-pluses +@@ -1,4 +1,4 @@ +--- 1st line +-++ 2nd line +--- 3rd line +-++ 4th line ++++ 1st line ++-- 2nd line ++++ 3rd line ++-- 4th line +`, + oldFilename: "minuses-and-pluses", + filename: "minuses-and-pluses", + addition: 4, + deletion: 4, + }, + } + + for _, testcase := range tests { + t.Run(testcase.name, func(t *testing.T) { + got, err := ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff), "") + if (err != nil) != testcase.wantErr { + t.Errorf("ParsePatch(%q) error = %v, wantErr %v", testcase.name, err, testcase.wantErr) + return + } + + gotMarshaled, _ := json.MarshalIndent(got, "", " ") + if got.NumFiles != 1 { + t.Errorf("ParsePath(%q) did not receive 1 file:\n%s", testcase.name, string(gotMarshaled)) + return + } + if got.TotalAddition != testcase.addition { + t.Errorf("ParsePath(%q) does not have correct totalAddition %d, wanted %d", testcase.name, got.TotalAddition, testcase.addition) + } + if got.TotalDeletion != testcase.deletion { + t.Errorf("ParsePath(%q) did not have correct totalDeletion %d, wanted %d", testcase.name, got.TotalDeletion, testcase.deletion) + } + file := got.Files[0] + if file.Addition != testcase.addition { + t.Errorf("ParsePath(%q) does not have correct file addition %d, wanted %d", testcase.name, file.Addition, testcase.addition) + } + if file.Deletion != testcase.deletion { + t.Errorf("ParsePath(%q) did not have correct file deletion %d, wanted %d", testcase.name, file.Deletion, testcase.deletion) + } + if file.OldName != testcase.oldFilename { + t.Errorf("ParsePath(%q) did not have correct OldName %q, wanted %q", testcase.name, file.OldName, testcase.oldFilename) + } + if file.Name != testcase.filename { + t.Errorf("ParsePath(%q) did not have correct Name %q, wanted %q", testcase.name, file.Name, testcase.filename) + } + }) + } + + // Test max lines + diffBuilder := &strings.Builder{} + + diff := `diff --git a/newfile2 b/newfile2 +new file mode 100644 +index 0000000..6bb8f39 +--- /dev/null ++++ b/newfile2 +@@ -0,0 +1,35 @@ +` + diffBuilder.WriteString(diff) + + for i := 0; i < 35; i++ { + diffBuilder.WriteString("+line" + strconv.Itoa(i) + "\n") + } + diff = diffBuilder.String() + result, err := ParsePatch(db.DefaultContext, 20, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "") + if err != nil { + t.Errorf("There should not be an error: %v", err) + } + if !result.Files[0].IsIncomplete { + t.Errorf("Files should be incomplete! %v", result.Files[0]) + } + result, err = ParsePatch(db.DefaultContext, 40, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "") + if err != nil { + t.Errorf("There should not be an error: %v", err) + } + if result.Files[0].IsIncomplete { + t.Errorf("Files should not be incomplete! %v", result.Files[0]) + } + result, err = ParsePatch(db.DefaultContext, 40, 5, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "") + if err != nil { + t.Errorf("There should not be an error: %v", err) + } + if !result.Files[0].IsIncomplete { + t.Errorf("Files should be incomplete! %v", result.Files[0]) + } + + // Test max characters + diff = `diff --git a/newfile2 b/newfile2 +new file mode 100644 +index 0000000..6bb8f39 +--- /dev/null ++++ b/newfile2 +@@ -0,0 +1,35 @@ +` + diffBuilder.Reset() + diffBuilder.WriteString(diff) + + for i := 0; i < 33; i++ { + diffBuilder.WriteString("+line" + strconv.Itoa(i) + "\n") + } + diffBuilder.WriteString("+line33") + for i := 0; i < 512; i++ { + diffBuilder.WriteString("0123456789ABCDEF") + } + diffBuilder.WriteByte('\n') + diffBuilder.WriteString("+line" + strconv.Itoa(34) + "\n") + diffBuilder.WriteString("+line" + strconv.Itoa(35) + "\n") + diff = diffBuilder.String() + + result, err = ParsePatch(db.DefaultContext, 20, 4096, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "") + if err != nil { + t.Errorf("There should not be an error: %v", err) + } + if !result.Files[0].IsIncomplete { + t.Errorf("Files should be incomplete! %v", result.Files[0]) + } + result, err = ParsePatch(db.DefaultContext, 40, 4096, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "") + if err != nil { + t.Errorf("There should not be an error: %v", err) + } + if !result.Files[0].IsIncomplete { + t.Errorf("Files should be incomplete! %v", result.Files[0]) + } + + diff = `diff --git "a/README.md" "b/README.md" +--- a/README.md ++++ b/README.md +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off` + _, err = ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff), "") + if err != nil { + t.Errorf("ParsePatch failed: %s", err) + } + + diff2 := `diff --git "a/A \\ B" "b/A \\ B" +--- "a/A \\ B" ++++ "b/A \\ B" +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off` + _, err = ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff2), "") + if err != nil { + t.Errorf("ParsePatch failed: %s", err) + } + + diff2a := `diff --git "a/A \\ B" b/A/B +--- "a/A \\ B" ++++ b/A/B +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off` + _, err = ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff2a), "") + if err != nil { + t.Errorf("ParsePatch failed: %s", err) + } + + diff3 := `diff --git a/README.md b/README.md +--- a/README.md ++++ b/README.md +@@ -1,3 +1,6 @@ + # gitea-github-migrator ++ ++ Build Status +- Latest Release + Docker Pulls ++ cut off ++ cut off` + _, err = ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(diff3), "") + if err != nil { + t.Errorf("ParsePatch failed: %s", err) + } +} + +func setupDefaultDiff() *Diff { + return &Diff{ + Files: []*DiffFile{ + { + Name: "README.md", + Sections: []*DiffSection{ + { + Lines: []*DiffLine{ + { + LeftIdx: 4, + RightIdx: 4, + }, + }, + }, + }, + }, + }, + } +} + +func TestDiff_LoadCommentsNoOutdated(t *testing.T) { + require.NoError(t, unittest.PrepareTestDatabase()) + + issue := unittest.AssertExistsAndLoadBean(t, &issues_model.Issue{ID: 2}) + user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 1}) + diff := setupDefaultDiff() + require.NoError(t, diff.LoadComments(db.DefaultContext, issue, user, false)) + assert.Len(t, diff.Files[0].Sections[0].Lines[0].Conversations, 2) +} + +func TestDiff_LoadCommentsWithOutdated(t *testing.T) { + require.NoError(t, unittest.PrepareTestDatabase()) + + issue := unittest.AssertExistsAndLoadBean(t, &issues_model.Issue{ID: 2}) + user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 1}) + diff := setupDefaultDiff() + require.NoError(t, diff.LoadComments(db.DefaultContext, issue, user, true)) + assert.Len(t, diff.Files[0].Sections[0].Lines[0].Conversations, 2) + assert.Len(t, diff.Files[0].Sections[0].Lines[0].Conversations[0], 2) + assert.Len(t, diff.Files[0].Sections[0].Lines[0].Conversations[1], 1) +} + +func TestDiffLine_CanComment(t *testing.T) { + assert.False(t, (&DiffLine{Type: DiffLineSection}).CanComment()) + assert.False(t, (&DiffLine{Type: DiffLineAdd, Conversations: []issues_model.CodeConversation{{{Content: "bla"}}}}).CanComment()) + assert.True(t, (&DiffLine{Type: DiffLineAdd}).CanComment()) + assert.True(t, (&DiffLine{Type: DiffLineDel}).CanComment()) + assert.True(t, (&DiffLine{Type: DiffLinePlain}).CanComment()) +} + +func TestDiffLine_GetCommentSide(t *testing.T) { + assert.Equal(t, "previous", (&DiffLine{Conversations: []issues_model.CodeConversation{{{Line: -3}}}}).GetCommentSide()) + assert.Equal(t, "proposed", (&DiffLine{Conversations: []issues_model.CodeConversation{{{Line: 3}}}}).GetCommentSide()) +} + +func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) { + gitRepo, err := git.OpenRepository(git.DefaultContext, "./testdata/academic-module") + require.NoError(t, err) + + defer gitRepo.Close() + for _, behavior := range []git.TrustedCmdArgs{{"-w"}, {"--ignore-space-at-eol"}, {"-b"}, nil} { + diffs, err := GetDiff(db.DefaultContext, gitRepo, + &DiffOptions{ + AfterCommitID: "bd7063cc7c04689c4d082183d32a604ed27a24f9", + BeforeCommitID: "559c156f8e0178b71cb44355428f24001b08fc68", + MaxLines: setting.Git.MaxGitDiffLines, + MaxLineCharacters: setting.Git.MaxGitDiffLineCharacters, + MaxFiles: setting.Git.MaxGitDiffFiles, + WhitespaceBehavior: behavior, + }) + require.NoError(t, err, "Error when diff with %s", behavior) + for _, f := range diffs.Files { + assert.NotEmpty(t, f.Sections, "%s should have sections", f.Name) + } + } +} + +func TestNoCrashes(t *testing.T) { + type testcase struct { + gitdiff string + } + + tests := []testcase{ + { + gitdiff: "diff --git \n--- a\t\n", + }, + { + gitdiff: "diff --git \"0\n", + }, + } + for _, testcase := range tests { + // It shouldn't crash, so don't care about the output. + ParsePatch(db.DefaultContext, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff), "") + } +} diff --git a/services/gitdiff/highlightdiff.go b/services/gitdiff/highlightdiff.go new file mode 100644 index 0000000..c72959e --- /dev/null +++ b/services/gitdiff/highlightdiff.go @@ -0,0 +1,227 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package gitdiff + +import ( + "strings" + + "code.gitea.io/gitea/modules/highlight" + + "github.com/sergi/go-diff/diffmatchpatch" +) + +// token is a html tag or entity, eg: "", "", "<" +func extractHTMLToken(s string) (before, token, after string, valid bool) { + for pos1 := 0; pos1 < len(s); pos1++ { + if s[pos1] == '<' { + pos2 := strings.IndexByte(s[pos1:], '>') + if pos2 == -1 { + return "", "", s, false + } + return s[:pos1], s[pos1 : pos1+pos2+1], s[pos1+pos2+1:], true + } else if s[pos1] == '&' { + pos2 := strings.IndexByte(s[pos1:], ';') + if pos2 == -1 { + return "", "", s, false + } + return s[:pos1], s[pos1 : pos1+pos2+1], s[pos1+pos2+1:], true + } + } + return "", "", s, true +} + +// HighlightCodeDiff is used to do diff with highlighted HTML code. +// It totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes. +// The HTML tags and entities will be replaced by Unicode placeholders: "{TEXT}" => "\uE000{TEXT}\uE001" +// These Unicode placeholders are friendly to the diff. +// Then after diff, the placeholders in diff result will be recovered to the HTML tags and entities. +// It's guaranteed that the tags in final diff result are paired correctly. +type HighlightCodeDiff struct { + placeholderBegin rune + placeholderMaxCount int + placeholderIndex int + PlaceholderTokenMap map[rune]string + tokenPlaceholderMap map[string]rune + + placeholderOverflowCount int + + lineWrapperTags []string +} + +func NewHighlightCodeDiff() *HighlightCodeDiff { + return &HighlightCodeDiff{ + placeholderBegin: rune(0x100000), // Plane 16: Supplementary Private Use Area B (U+100000..U+10FFFD) + placeholderMaxCount: 64000, + PlaceholderTokenMap: map[rune]string{}, + tokenPlaceholderMap: map[string]rune{}, + } +} + +// NextPlaceholder returns 0 if no more placeholder can be used +// the diff is done line by line, usually there are only a few (no more than 10) placeholders in one line +// so the placeholderMaxCount is impossible to be exhausted in real cases. +func (hcd *HighlightCodeDiff) NextPlaceholder() rune { + for hcd.placeholderIndex < hcd.placeholderMaxCount { + r := hcd.placeholderBegin + rune(hcd.placeholderIndex) + hcd.placeholderIndex++ + // only use non-existing (not used by code) rune as placeholders + if _, ok := hcd.PlaceholderTokenMap[r]; !ok { + return r + } + } + return 0 // no more available placeholder +} + +func (hcd *HighlightCodeDiff) isInPlaceholderRange(r rune) bool { + return hcd.placeholderBegin <= r && r < hcd.placeholderBegin+rune(hcd.placeholderMaxCount) +} + +func (hcd *HighlightCodeDiff) CollectUsedRunes(code string) { + for _, r := range code { + if hcd.isInPlaceholderRange(r) { + // put the existing rune (used by code) in map, then this rune won't be used a placeholder anymore. + hcd.PlaceholderTokenMap[r] = "" + } + } +} + +func (hcd *HighlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB string) []diffmatchpatch.Diff { + hcd.CollectUsedRunes(codeA) + hcd.CollectUsedRunes(codeB) + + highlightCodeA, _ := highlight.Code(filename, language, codeA) + highlightCodeB, _ := highlight.Code(filename, language, codeB) + + convertedCodeA := hcd.ConvertToPlaceholders(string(highlightCodeA)) + convertedCodeB := hcd.ConvertToPlaceholders(string(highlightCodeB)) + + diffs := diffMatchPatch.DiffMain(convertedCodeA, convertedCodeB, true) + diffs = diffMatchPatch.DiffCleanupSemantic(diffs) + diffs = diffMatchPatch.DiffCleanupEfficiency(diffs) + + for i := range diffs { + hcd.recoverOneDiff(&diffs[i]) + } + return diffs +} + +// convertToPlaceholders totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes. +func (hcd *HighlightCodeDiff) ConvertToPlaceholders(htmlCode string) string { + var tagStack []string + res := strings.Builder{} + + firstRunForLineTags := hcd.lineWrapperTags == nil + + var beforeToken, token string + var valid bool + + // the standard chroma highlight HTML is " ... " + for { + beforeToken, token, htmlCode, valid = extractHTMLToken(htmlCode) + if !valid || token == "" { + break + } + // write the content before the token into result string, and consume the token in the string + res.WriteString(beforeToken) + + // the line wrapper tags should be removed before diff + if strings.HasPrefix(token, `") + continue + } + + var tokenInMap string + if strings.HasSuffix(token, "" for "" + tokenInMap = token + "" + tagStack = tagStack[:len(tagStack)-1] + } else if token[0] == '<' { // for opening tag + tokenInMap = token + tagStack = append(tagStack, token) + } else if token[0] == '&' { // for html entity + tokenInMap = token + } // else: impossible + + // remember the placeholder and token in the map + placeholder, ok := hcd.tokenPlaceholderMap[tokenInMap] + if !ok { + placeholder = hcd.NextPlaceholder() + if placeholder != 0 { + hcd.tokenPlaceholderMap[tokenInMap] = placeholder + hcd.PlaceholderTokenMap[placeholder] = tokenInMap + } + } + + if placeholder != 0 { + res.WriteRune(placeholder) // use the placeholder to replace the token + } else { + // unfortunately, all private use runes has been exhausted, no more placeholder could be used, no more converting + // usually, the exhausting won't occur in real cases, the magnitude of used placeholders is not larger than that of the CSS classes outputted by chroma. + hcd.placeholderOverflowCount++ + if strings.HasPrefix(token, "&") { + // when the token is a html entity, something must be outputted even if there is no placeholder. + res.WriteRune(0xFFFD) // replacement character TODO: how to handle this case more gracefully? + res.WriteString(token[1:]) // still output the entity code part, otherwise there will be no diff result. + } + } + } + + // write the remaining string + res.WriteString(htmlCode) + return res.String() +} + +func (hcd *HighlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { + diff.Text = hcd.Recover(diff.Text) +} + +func (hcd *HighlightCodeDiff) Recover(src string) string { + sb := strings.Builder{} + var tagStack []string + + for _, r := range src { + token, ok := hcd.PlaceholderTokenMap[r] + if !ok || token == "" { + sb.WriteRune(r) // if the rune is not a placeholder, write it as it is + continue + } + var tokenToRecover string + if strings.HasPrefix(token, "')+1] + if len(tagStack) == 0 { + continue // if no opening tag in stack yet, skip the closing tag + } + tagStack = tagStack[:len(tagStack)-1] + } else if token[0] == '<' { // for opening tag + tokenToRecover = token + tagStack = append(tagStack, token) + } else if token[0] == '&' { // for html entity + tokenToRecover = token + } // else: impossible + sb.WriteString(tokenToRecover) + } + + if len(tagStack) > 0 { + // close all opening tags + for i := len(tagStack) - 1; i >= 0; i-- { + tagToClose := tagStack[i] + // get the closing tag "" from "" or "" + pos := strings.IndexAny(tagToClose, " >") + if pos != -1 { + sb.WriteString("") + } // else: impossible. every tag was pushed into the stack by the code above and is valid HTML opening tag + } + } + + return sb.String() +} diff --git a/services/gitdiff/highlightdiff_test.go b/services/gitdiff/highlightdiff_test.go new file mode 100644 index 0000000..2ff4472 --- /dev/null +++ b/services/gitdiff/highlightdiff_test.go @@ -0,0 +1,125 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package gitdiff + +import ( + "fmt" + "strings" + "testing" + + "github.com/sergi/go-diff/diffmatchpatch" + "github.com/stretchr/testify/assert" +) + +func TestDiffWithHighlight(t *testing.T) { + hcd := NewHighlightCodeDiff() + diffs := hcd.diffWithHighlight( + "main.v", "", + " run('<>')\n", + " run(db)\n", + ) + + expected := ` run('<>')` + output := diffToHTML(nil, diffs, DiffLineDel) + assert.Equal(t, expected, output) + + expected = ` run(db)` + output = diffToHTML(nil, diffs, DiffLineAdd) + assert.Equal(t, expected, output) + + hcd = NewHighlightCodeDiff() + hcd.PlaceholderTokenMap['O'] = "" + hcd.PlaceholderTokenMap['C'] = "" + diff := diffmatchpatch.Diff{} + + diff.Text = "OC" + hcd.recoverOneDiff(&diff) + assert.Equal(t, "", diff.Text) + + diff.Text = "O" + hcd.recoverOneDiff(&diff) + assert.Equal(t, "", diff.Text) + + diff.Text = "C" + hcd.recoverOneDiff(&diff) + assert.Equal(t, "", diff.Text) +} + +func TestDiffWithHighlightPlaceholder(t *testing.T) { + hcd := NewHighlightCodeDiff() + diffs := hcd.diffWithHighlight( + "main.js", "", + "a='\U00100000'", + "a='\U0010FFFD''", + ) + assert.Equal(t, "", hcd.PlaceholderTokenMap[0x00100000]) + assert.Equal(t, "", hcd.PlaceholderTokenMap[0x0010FFFD]) + + expected := fmt.Sprintf(`a='%s'`, "\U00100000") + output := diffToHTML(hcd.lineWrapperTags, diffs, DiffLineDel) + assert.Equal(t, expected, output) + + hcd = NewHighlightCodeDiff() + diffs = hcd.diffWithHighlight( + "main.js", "", + "a='\U00100000'", + "a='\U0010FFFD'", + ) + expected = fmt.Sprintf(`a='%s'`, "\U0010FFFD") + output = diffToHTML(nil, diffs, DiffLineAdd) + assert.Equal(t, expected, output) +} + +func TestDiffWithHighlightPlaceholderExhausted(t *testing.T) { + hcd := NewHighlightCodeDiff() + hcd.placeholderMaxCount = 0 + diffs := hcd.diffWithHighlight( + "main.js", "", + "'", + ``, + ) + output := diffToHTML(nil, diffs, DiffLineDel) + expected := fmt.Sprintf(`%s#39;`, "\uFFFD") + assert.Equal(t, expected, output) + + hcd = NewHighlightCodeDiff() + hcd.placeholderMaxCount = 0 + diffs = hcd.diffWithHighlight( + "main.js", "", + "a < b", + "a > b", + ) + output = diffToHTML(nil, diffs, DiffLineDel) + expected = fmt.Sprintf(`a %slt; b`, "\uFFFD") + assert.Equal(t, expected, output) + + output = diffToHTML(nil, diffs, DiffLineAdd) + expected = fmt.Sprintf(`a %sgt; b`, "\uFFFD") + assert.Equal(t, expected, output) +} + +func TestDiffWithHighlightTagMatch(t *testing.T) { + totalOverflow := 0 + for i := 0; i < 100; i++ { + hcd := NewHighlightCodeDiff() + hcd.placeholderMaxCount = i + diffs := hcd.diffWithHighlight( + "main.js", "", + "a='1'", + "b='2'", + ) + totalOverflow += hcd.placeholderOverflowCount + + output := diffToHTML(nil, diffs, DiffLineDel) + c1 := strings.Count(output, " 1574829684 +0800 clone: from https://try.gitea.io/shemgp-aiias/academic-module diff --git a/services/gitdiff/testdata/academic-module/logs/refs/heads/master b/services/gitdiff/testdata/academic-module/logs/refs/heads/master new file mode 100644 index 0000000..16b2e1c --- /dev/null +++ b/services/gitdiff/testdata/academic-module/logs/refs/heads/master @@ -0,0 +1 @@ +0000000000000000000000000000000000000000 bd7063cc7c04689c4d082183d32a604ed27a24f9 Lunny Xiao 1574829684 +0800 clone: from https://try.gitea.io/shemgp-aiias/academic-module diff --git a/services/gitdiff/testdata/academic-module/logs/refs/remotes/origin/HEAD b/services/gitdiff/testdata/academic-module/logs/refs/remotes/origin/HEAD new file mode 100644 index 0000000..16b2e1c --- /dev/null +++ b/services/gitdiff/testdata/academic-module/logs/refs/remotes/origin/HEAD @@ -0,0 +1 @@ +0000000000000000000000000000000000000000 bd7063cc7c04689c4d082183d32a604ed27a24f9 Lunny Xiao 1574829684 +0800 clone: from https://try.gitea.io/shemgp-aiias/academic-module diff --git a/services/gitdiff/testdata/academic-module/objects/pack/pack-597efbc3613c7ba790e33b178fd9fc1fe17b4245.idx b/services/gitdiff/testdata/academic-module/objects/pack/pack-597efbc3613c7ba790e33b178fd9fc1fe17b4245.idx new file mode 100644 index 0000000..4d759aa Binary files /dev/null and b/services/gitdiff/testdata/academic-module/objects/pack/pack-597efbc3613c7ba790e33b178fd9fc1fe17b4245.idx differ diff --git a/services/gitdiff/testdata/academic-module/objects/pack/pack-597efbc3613c7ba790e33b178fd9fc1fe17b4245.pack b/services/gitdiff/testdata/academic-module/objects/pack/pack-597efbc3613c7ba790e33b178fd9fc1fe17b4245.pack new file mode 100644 index 0000000..2dc49cf Binary files /dev/null and b/services/gitdiff/testdata/academic-module/objects/pack/pack-597efbc3613c7ba790e33b178fd9fc1fe17b4245.pack differ diff --git a/services/gitdiff/testdata/academic-module/packed-refs b/services/gitdiff/testdata/academic-module/packed-refs new file mode 100644 index 0000000..13b5611 --- /dev/null +++ b/services/gitdiff/testdata/academic-module/packed-refs @@ -0,0 +1,2 @@ +# pack-refs with: peeled fully-peeled sorted +bd7063cc7c04689c4d082183d32a604ed27a24f9 refs/remotes/origin/master diff --git a/services/gitdiff/testdata/academic-module/refs/heads/master b/services/gitdiff/testdata/academic-module/refs/heads/master new file mode 100644 index 0000000..bd2b56e --- /dev/null +++ b/services/gitdiff/testdata/academic-module/refs/heads/master @@ -0,0 +1 @@ +bd7063cc7c04689c4d082183d32a604ed27a24f9 diff --git a/services/gitdiff/testdata/academic-module/refs/remotes/origin/HEAD b/services/gitdiff/testdata/academic-module/refs/remotes/origin/HEAD new file mode 100644 index 0000000..6efe28f --- /dev/null +++ b/services/gitdiff/testdata/academic-module/refs/remotes/origin/HEAD @@ -0,0 +1 @@ +ref: refs/remotes/origin/master -- cgit v1.2.3