b4/diff.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# SPDX-License-Identifier: GPL-2.0-or-later
# Copyright (C) 2020 by the Linux Foundation
#
__author__ = 'Konstantin Ryabitsev <konstantin@linuxfoundation.org>'

import os
import sys
import b4
import b4.mbox
import mailbox
import shutil
import urllib.parse

from tempfile import mkstemp


logger = b4.logger


def make_fake_commit_range(gitdir, lser):
    start_commit = end_commit = None
    # Do we have it in cache already?
    cachedir = b4.get_cache_dir()
    # Use the msgid of the first non-None patch in the series
    msgid = None
    for lmsg in lser.patches:
        if lmsg is not None:
            msgid = lmsg.msgid
            break
    if msgid is None:
        logger.critical('Cannot operate on an empty series')
        return None, None
    cachefile = os.path.join(cachedir, '%s.fakeam' % urllib.parse.quote_plus(msgid))
    if os.path.exists(cachefile):
        stalecache = False
        with open(cachefile, 'r') as fh:
            cachedata = fh.read()
            chunks = cachedata.strip().split()
            if len(chunks) == 2:
                start_commit, end_commit = chunks
            else:
                stalecache = True
        if start_commit is not None and end_commit is not None:
            # Make sure they are still there
            ecode, out = b4.git_run_command(gitdir, ['cat-file', '-e', start_commit])
            if ecode > 0:
                stalecache = True
            else:
                ecode, out = b4.git_run_command(gitdir, ['cat-file', '-e', end_commit])
                if ecode > 0:
                    stalecache = True
                else:
                    logger.debug('Using previously generated range')
                    return start_commit, end_commit

        if stalecache:
            logger.debug('Stale cache for [v%s] %s', lser.revision, lser.subject)
            os.unlink(cachefile)

    logger.info('Preparing fake-am for v%s: %s', lser.revision, lser.subject)
    with b4.git_temp_worktree(gitdir):
        # We are in a temporary chdir at this time, so writing to a known file should be safe
        mbxf = '.__git-am__'
        mbx = mailbox.mbox(mbxf)
        # Logic largely borrowed from gj_tools
        seenfiles = set()
        for lmsg in lser.patches[1:]:
            logger.debug('Looking at %s', lmsg.full_subject)
            lmsg.load_hashes()
            for fn, fi in lmsg.blob_indexes:
                if fn in seenfiles:
                    # We already processed this file, so this blob won't match
                    continue
                seenfiles.add(fn)
                if set(fi) == {'0'}:
                    # New file creation, nothing to do here
                    logger.debug('  New file: %s', fn)
                    continue
                # Try to grab full ref_id of this hash
                ecode, out = b4.git_run_command(gitdir, ['rev-parse', fi])
                if ecode > 0:
                    logger.critical('  ERROR: Could not find matching blob for %s (%s)', fn, fi)
                    # TODO: better handling
                    return None, None
                logger.debug('  Found matching blob for: %s', fn)
                fullref = out.strip()
                gitargs = ['update-index', '--add', '--cacheinfo', f'0644,{fullref},{fn}']
                ecode, out = b4.git_run_command(None, gitargs)
                if ecode > 0:
                    logger.critical('  ERROR: Could not run update-index for %s (%s)', fn, fullref)
                    return None, None
            mbx.add(lmsg.msg.as_string(policy=b4.emlpolicy).encode('utf-8'))

        mbx.close()
        ecode, out = b4.git_run_command(None, ['write-tree'])
        if ecode > 0:
            logger.critical('ERROR: Could not write fake-am tree')
            return None, None
        treeid = out.strip()
        # At this point we have a worktree with files that should cleanly receive a git am
        gitargs = ['commit-tree', treeid + '^{tree}', '-F', '-']
        ecode, out = b4.git_run_command(None, gitargs, stdin='Initial fake commit'.encode('utf-8'))
        if ecode > 0:
            logger.critical('ERROR: Could not commit-tree')
            return None, None
        start_commit = out.strip()
        b4.git_run_command(None, ['reset', '--hard', start_commit])
        ecode, out = b4.git_run_command(None, ['am', mbxf])
        if ecode > 0:
            logger.critical('ERROR: Could not fake-am version %s', lser.revision)
            return None, None
        ecode, out = b4.git_run_command(None, ['rev-parse', 'HEAD'])
        end_commit = out.strip()
        logger.info('  range: %.12s..%.12s', start_commit, end_commit)

    with open(cachefile, 'w') as fh:
        logger.debug('Saving into cache: %s', cachefile)
        logger.debug('    %s..%s', start_commit, end_commit)
        fh.write(f'{start_commit} {end_commit}\n')

    return start_commit, end_commit


def main(cmdargs):
    msgid = b4.get_msgid(cmdargs)
    if cmdargs.wantvers and len(cmdargs.wantvers) > 2:
        logger.critical('Can only compare two versions at a time')
        sys.exit(1)

    # start by grabbing the mbox provided
    savefile = mkstemp('b4-diff-to')[1]
    # Do we have a cache of this lookup?
    cachedir = b4.get_cache_dir()
    if cmdargs.wantvers:
        cachefile = os.path.join(cachedir, '%s-%s.diff.mbx' % (urllib.parse.quote_plus(msgid),
                                                               '-'.join([str(x) for x in cmdargs.wantvers])))
    else:
        cachefile = os.path.join(cachedir, '%s-latest.diff.mbx' % urllib.parse.quote_plus(msgid))
    if os.path.exists(cachefile) and not cmdargs.nocache:
        logger.info('Using cached copy of the lookup')
        shutil.copyfile(cachefile, savefile)
        mboxfile = savefile
    else:
        mboxfile = b4.get_pi_thread_by_msgid(msgid, savefile, useproject=cmdargs.useproject, nocache=cmdargs.nocache)
        if mboxfile is None:
            logger.critical('Unable to retrieve thread: %s', msgid)
            return
        logger.info('Retrieved %s messages in the thread', len(mboxfile))
        b4.mbox.get_extra_series(mboxfile, direction=-1, wantvers=cmdargs.wantvers)

    shutil.copyfile(mboxfile, cachefile)
    mbx = mailbox.mbox(mboxfile)
    count = len(mbx)
    logger.info('---')
    logger.info('Analyzing %s messages in the thread', count)
    lmbx = b4.LoreMailbox()
    for key, msg in mbx.items():
        lmbx.add_message(msg)
    if cmdargs.wantvers and len(cmdargs.wantvers) == 1:
        upper = max(lmbx.series.keys())
        lower = cmdargs.wantvers[0]
    elif cmdargs.wantvers and len(cmdargs.wantvers) == 2:
        upper = max(cmdargs.wantvers)
        lower = min(cmdargs.wantvers)
    else:
        upper = max(lmbx.series.keys())
        lower = min(lmbx.series.keys())

    if upper == lower:
        logger.critical('Could not find previous revision')
        os.unlink(mboxfile)
        sys.exit(1)

    if upper not in lmbx.series:
        logger.critical('Could not find revision %s', upper)
        os.unlink(mboxfile)
        sys.exit(1)
    if lower not in lmbx.series:
        logger.critical('Could not find revision %s', lower)
        os.unlink(mboxfile)
        sys.exit(1)

    # Prepare the lower fake-am range
    lsc, lec = make_fake_commit_range(cmdargs.gitdir, lmbx.series[lower])
    if lsc is None or lec is None:
        logger.critical('---')
        logger.critical('Could not create fake-am range for lower series v%s', lower)
        os.unlink(mboxfile)
        sys.exit(1)
    # Prepare the upper fake-am range
    usc, uec = make_fake_commit_range(cmdargs.gitdir, lmbx.series[upper])
    if usc is None or uec is None:
        logger.critical('---')
        logger.critical('Could not create fake-am range for upper series v%s', upper)
        os.unlink(mboxfile)
        sys.exit(1)
    logger.info('---')
    grdcmd = 'git range-diff %.12s..%.12s %.12s..%.12s' % (lsc, lec, usc, uec)
    if cmdargs.nodiff:
        logger.info('Success, to compare v%s and v%s:', lower, upper)
        logger.info(f'    {grdcmd}')
        sys.exit(0)
    logger.info('Diffing v%s and v%s', lower, upper)
    logger.info('    Running: %s', grdcmd)
    gitargs = ['range-diff', f'{lsc}..{lec}', f'{usc}..{uec}']
    if cmdargs.outdiff is None or cmdargs.color:
        gitargs.append('--color')
    ecode, rdiff = b4.git_run_command(cmdargs.gitdir, gitargs)
    if ecode > 0:
        logger.critical('Unable to generate diff')
        logger.critical('Try running it yourself:')
        logger.critical(f'    {grdcmd}')
        sys.exit(1)
    if cmdargs.outdiff is not None:
        logger.info('Writing %s', cmdargs.outdiff)
        fh = open(cmdargs.outdiff, 'w')
    else:
        logger.info('---')
        fh = sys.stdout
    fh.write(rdiff)