From 452f4fa51e1fd1d0e3155fb4f4d8913298ac63a6 Mon Sep 17 00:00:00 2001 From: Tay Ray Chuan Date: Thu, 7 Jul 2011 12:23:55 +0800 Subject: xdiff/xprepare: use memset() Use memset() instead of a for loop to initialize. This could give a performance advantage. Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'xdiff/xprepare.c') diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 1689085235..783631a34c 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -64,8 +64,6 @@ static int xdl_optimize_ctxs(xdfile_t *xdf1, xdfile_t *xdf2); static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) { - long i; - cf->flags = flags; cf->hbits = xdl_hashbits((unsigned int) size); @@ -80,8 +78,7 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) { xdl_cha_free(&cf->ncha); return -1; } - for (i = 0; i < cf->hsize; i++) - cf->rchash[i] = NULL; + memset(cf->rchash, 0, cf->hsize * sizeof(xdlclass_t *)); cf->count = 0; @@ -136,7 +133,7 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, xdlclassifier_t *cf, xdfile_t *xdf) { unsigned int hbits; - long i, nrec, hsize, bsize; + long nrec, hsize, bsize; unsigned long hav; char const *blk, *cur, *top, *prev; xrecord_t *crec; @@ -164,8 +161,7 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, xdl_cha_free(&xdf->rcha); return -1; } - for (i = 0; i < hsize; i++) - rhash[i] = NULL; + memset(rhash, 0, hsize * sizeof(xrecord_t *)); nrec = 0; if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { -- cgit v1.2.3 From 159607a8f1ad16c0a04e0f17d5f8a35299696bc6 Mon Sep 17 00:00:00 2001 From: Tay Ray Chuan Date: Thu, 7 Jul 2011 12:23:56 +0800 Subject: xdiff/xprepare: refactor abort cleanups Group free()'s that are called when a malloc() fails in xdl_prepare_ctx(), making for more readable code. Also add a free() on ha, in case future git hackers add allocs after the ha malloc. Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 91 ++++++++++++++++++++------------------------------------ 1 file changed, 32 insertions(+), 59 deletions(-) (limited to 'xdiff/xprepare.c') diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 783631a34c..0f571db2d7 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -143,24 +143,21 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, char *rchg; long *rindex; - if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) { + ha = NULL; + rindex = NULL; + rchg = NULL; + rhash = NULL; + recs = NULL; - return -1; - } - if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) { - - xdl_cha_free(&xdf->rcha); - return -1; - } + if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) + goto abort; + if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) + goto abort; hbits = xdl_hashbits((unsigned int) narec); hsize = 1 << hbits; - if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) { - - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) + goto abort; memset(rhash, 0, hsize * sizeof(xrecord_t *)); nrec = 0; @@ -175,63 +172,30 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, hav = xdl_hash_record(&cur, top, xpp->flags); if (nrec >= narec) { narec *= 2; - if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) { - - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) + goto abort; recs = rrecs; } - if (!(crec = xdl_cha_alloc(&xdf->rcha))) { - - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (!(crec = xdl_cha_alloc(&xdf->rcha))) + goto abort; crec->ptr = prev; crec->size = (long) (cur - prev); crec->ha = hav; recs[nrec++] = crec; - if (xdl_classify_record(cf, rhash, hbits, crec) < 0) { - - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (xdl_classify_record(cf, rhash, hbits, crec) < 0) + goto abort; } } - if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char)))) { - - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char)))) + goto abort; memset(rchg, 0, (nrec + 2) * sizeof(char)); - if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) { - - xdl_free(rchg); - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } - if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) { - - xdl_free(rindex); - xdl_free(rchg); - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) + goto abort; + if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) + goto abort; xdf->nrec = nrec; xdf->recs = recs; @@ -245,6 +209,15 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, xdf->dend = nrec - 1; return 0; + +abort: + xdl_free(ha); + xdl_free(rindex); + xdl_free(rchg); + xdl_free(rhash); + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; } -- cgit v1.2.3 From 9f37c275938e1fbca7165872dad039874add09cd Mon Sep 17 00:00:00 2001 From: Tay Ray Chuan Date: Tue, 12 Jul 2011 14:10:26 +0800 Subject: xdiff/xprepare: skip classification xdiff performs "classification" of records (xdl_classify_record()), replacing hashes (xrecord_t.ha) with a unique identifier of the record/line and building a hash table (xrecord_t.rhash) of records. This is then used to "cleanup" records (xdl_cleanup_records()). We don't need any of that in histogram diff, so we omit calls to these functions. We also skip allocating memory to the hash table, rhash, as it is no longer used. This gives us a small boost in performance. Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'xdiff/xprepare.c') diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 0f571db2d7..7556538df5 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -154,11 +154,15 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) goto abort; - hbits = xdl_hashbits((unsigned int) narec); - hsize = 1 << hbits; - if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) - goto abort; - memset(rhash, 0, hsize * sizeof(xrecord_t *)); + if (xpp->flags & XDF_HISTOGRAM_DIFF) + hbits = hsize = 0; + else { + hbits = xdl_hashbits((unsigned int) narec); + hsize = 1 << hbits; + if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) + goto abort; + memset(rhash, 0, hsize * sizeof(xrecord_t *)); + } nrec = 0; if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { @@ -183,7 +187,8 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, crec->ha = hav; recs[nrec++] = crec; - if (xdl_classify_record(cf, rhash, hbits, crec) < 0) + if (!(xpp->flags & XDF_HISTOGRAM_DIFF) && + xdl_classify_record(cf, rhash, hbits, crec) < 0) goto abort; } } @@ -240,7 +245,8 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, enl1 = xdl_guess_lines(mf1) + 1; enl2 = xdl_guess_lines(mf2) + 1; - if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) { + if (!(xpp->flags & XDF_HISTOGRAM_DIFF) && + xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) { return -1; } @@ -257,9 +263,11 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, return -1; } - xdl_free_classifier(&cf); + if (!(xpp->flags & XDF_HISTOGRAM_DIFF)) + xdl_free_classifier(&cf); if (!(xpp->flags & XDF_PATIENCE_DIFF) && + !(xpp->flags & XDF_HISTOGRAM_DIFF) && xdl_optimize_ctxs(&xe->xdf1, &xe->xdf2) < 0) { xdl_free_ctx(&xe->xdf2); -- cgit v1.2.3 From 86abba801575892a8a2b161aa29518c1ed05e1f1 Mon Sep 17 00:00:00 2001 From: Tay Ray Chuan Date: Tue, 12 Jul 2011 14:10:27 +0800 Subject: xdiff/xprepare: use a smaller sample size for histogram diff For histogram diff, we can afford a smaller sample size and thus a poorer estimate of the number of lines, as the hash table (rhash) won't be filled up/grown. This is safe as the final count of lines (xdf.nrecs) will be updated correctly anyway by xdl_prepare_ctx(). This gives us a small boost in performance. Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 17 ++++++++++++++--- xdiff/xutils.c | 8 ++------ xdiff/xutils.h | 2 +- 3 files changed, 17 insertions(+), 10 deletions(-) (limited to 'xdiff/xprepare.c') diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 7556538df5..dfbb0de987 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -26,6 +26,8 @@ #define XDL_KPDIS_RUN 4 #define XDL_MAX_EQLIMIT 1024 #define XDL_SIMSCAN_WINDOW 100 +#define XDL_GUESS_NLINES1 256 +#define XDL_GUESS_NLINES2 20 typedef struct s_xdlclass { @@ -239,11 +241,20 @@ static void xdl_free_ctx(xdfile_t *xdf) { int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdfenv_t *xe) { - long enl1, enl2; + long enl1, enl2, sample; xdlclassifier_t cf; - enl1 = xdl_guess_lines(mf1) + 1; - enl2 = xdl_guess_lines(mf2) + 1; + /* + * For histogram diff, we can afford a smaller sample size and + * thus a poorer estimate of the number of lines, as the hash + * table (rhash) won't be filled up/grown. The number of lines + * (nrecs) will be updated correctly anyway by + * xdl_prepare_ctx(). + */ + sample = xpp->flags & XDF_HISTOGRAM_DIFF ? XDL_GUESS_NLINES2 : XDL_GUESS_NLINES1; + + enl1 = xdl_guess_lines(mf1, sample) + 1; + enl2 = xdl_guess_lines(mf2, sample) + 1; if (!(xpp->flags & XDF_HISTOGRAM_DIFF) && xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) { diff --git a/xdiff/xutils.c b/xdiff/xutils.c index ded7c327dc..a45e89bbed 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -24,10 +24,6 @@ -#define XDL_GUESS_NLINES 256 - - - long xdl_bogosqrt(long n) { long i; @@ -159,12 +155,12 @@ void *xdl_cha_next(chastore_t *cha) { } -long xdl_guess_lines(mmfile_t *mf) { +long xdl_guess_lines(mmfile_t *mf, long sample) { long nl = 0, size, tsize = 0; char const *data, *cur, *top; if ((cur = data = xdl_mmfile_first(mf, &size)) != NULL) { - for (top = data + size; nl < XDL_GUESS_NLINES;) { + for (top = data + size; nl < sample;) { if (cur >= top) { tsize += (long) (cur - data); if (!(cur = data = xdl_mmfile_next(mf, &size))) diff --git a/xdiff/xutils.h b/xdiff/xutils.h index 674a657b08..714719a89c 100644 --- a/xdiff/xutils.h +++ b/xdiff/xutils.h @@ -33,7 +33,7 @@ void xdl_cha_free(chastore_t *cha); void *xdl_cha_alloc(chastore_t *cha); void *xdl_cha_first(chastore_t *cha); void *xdl_cha_next(chastore_t *cha); -long xdl_guess_lines(mmfile_t *mf); +long xdl_guess_lines(mmfile_t *mf, long sample); int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags); unsigned long xdl_hash_record(char const **data, char const *top, long flags); unsigned int xdl_hashbits(unsigned int size); -- cgit v1.2.3 From 739864b1ffb379120df9cfa4111c4ec20b823cfd Mon Sep 17 00:00:00 2001 From: Tay Ray Chuan Date: Mon, 1 Aug 2011 12:20:07 +0800 Subject: xdiff: do away with xdl_mmfile_next() Given our simple mmfile structure, xdl_mmfile_next() calls are redundant. Do away with calls to them. Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- xdiff/xdiff.h | 1 - xdiff/xprepare.c | 7 +------ xdiff/xutils.c | 14 +------------- 3 files changed, 2 insertions(+), 20 deletions(-) (limited to 'xdiff/xprepare.c') diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index c26170ce5e..4beb10c678 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -106,7 +106,6 @@ typedef struct s_bdiffparam { #define xdl_realloc(ptr,x) realloc(ptr,x) void *xdl_mmfile_first(mmfile_t *mmf, long *size); -void *xdl_mmfile_next(mmfile_t *mmf, long *size); long xdl_mmfile_size(mmfile_t *mmf); int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index dfbb0de987..620fc9a657 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -168,12 +168,7 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, nrec = 0; if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { - for (top = blk + bsize;;) { - if (cur >= top) { - if (!(cur = blk = xdl_mmfile_next(mf, &bsize))) - break; - top = blk + bsize; - } + for (top = blk + bsize; cur < top; ) { prev = cur; hav = xdl_hash_record(&cur, top, xpp->flags); if (nrec >= narec) { diff --git a/xdiff/xutils.c b/xdiff/xutils.c index a45e89bbed..0de084e53f 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -67,12 +67,6 @@ void *xdl_mmfile_first(mmfile_t *mmf, long *size) } -void *xdl_mmfile_next(mmfile_t *mmf, long *size) -{ - return NULL; -} - - long xdl_mmfile_size(mmfile_t *mmf) { return mmf->size; @@ -160,13 +154,7 @@ long xdl_guess_lines(mmfile_t *mf, long sample) { char const *data, *cur, *top; if ((cur = data = xdl_mmfile_first(mf, &size)) != NULL) { - for (top = data + size; nl < sample;) { - if (cur >= top) { - tsize += (long) (cur - data); - if (!(cur = data = xdl_mmfile_next(mf, &size))) - break; - top = data + size; - } + for (top = data + size; nl < sample && cur < top; ) { nl++; if (!(cur = memchr(cur, '\n', top - cur))) cur = top; -- cgit v1.2.3