tiger-compiler/tcsh/python/tests/ipynbtest.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279

#!/usr/bin/env python3
"""
simple example script for running and testing notebooks.

Usage: `ipnbdoctest.py foo.ipynb [bar.ipynb [...]]`

Each cell is submitted to the kernel, and the outputs are compared
with those stored in the notebook.
"""

# Derived from https://gist.github.com/minrk/2620735
# Derived from https://gitlab.lrde.epita.fr/spot/spot/-/raw/next/tests/python/ipnbdoctest.py

from __future__ import print_function

import os
import sys
import time
import base64
import re
import pprint
import random
from difflib import unified_diff as diff

from collections import defaultdict
try:
    from queue import Empty
except ImportError:
    print('Python 3.x is needed to run this script.')
    sys.exit(77)

import importlib.util
try:
    importlib.util.find_spec('IPython')
except:
    print('IPython is needed to run this script.')
    sys.exit(77)

try:
    from jupyter_client import KernelManager
except ImportError:
    try:
        from IPython.kernel import KernelManager
    except ImportError:
        try:
            from IPython.zmq.blockingkernelmanager \
                import BlockingKernelManager as KernelManager
        except:
            print('IPython is needed to run this script.')
            sys.exit(77)

# Until Debian Stable ships IPython >3.0, we stick to the v3 format.
try:
    from nbformat import v4 as nbformat
except ImportError:
    from IPython.nbformat import v4 as nbformat


def canonicalize(s):
    """sanitize a string for comparison.

    fix universal newlines, strip trailing newlines, and normalize likely
    random values (memory addresses and UUIDs)
    """
    if not isinstance(s, str):
        return s

    # normalize newline:
    s = s.replace('\r\n', '\n')

    # ignore trailing newlines (but not space)
    s = s.rstrip('\n')

    # remove hex addresses:
    s = re.sub(r'at 0x[a-f0-9]+', 'at 0xadd7e5500000', s)

    # remove backend lists in documentations/errors:
    s = re.sub(r'\[((lir|hir|llvm|mips|ia32), ?)*(lir|hir|llvm|mips|ia32)\]',
               '[]', s)

    return s


def canonical_dict(data):
    """Neutralize gratuitous differences in a Jupyter dictionary."""

    if 'text' in data:
        data['text'] = canonicalize(data['text'])

    if 'data' in data:
        d = data['data']
        if "text/html" in d and "text/plain" in d:
            del d["text/plain"]
        for k in d:
            d[k] = canonicalize(d[k])

    if ('ename' in data and
            data['ename'] == 'SystemExit' and data['evalue'] == '77'):
        # sys.exit(77) is used to Skip the test.
        sys.exit(77)

    if 'ename' in data and data['ename'] == 'CalledProcessError':
        # CalledProcessError message has a final dot in Python 3.6
        data['evalue'] = re.sub(r"(' returned non-zero exit status \d+)\.",
                                r'\1', data['evalue'])

    for e in ('transient', 'execution_count', 'traceback'):
        if e in data:
            del data[e]
    return data


def compare_outputs(ref, test):
    """Check that two lists of outputs are equivalent and report the result."""

    cref = list(map(canonical_dict, ref))
    ctest = list(map(canonical_dict, test))

    ok = True

    if len(cref) != len(ctest):
        print("output length mismatch (expected {}, got {})"
                .format(len(cref), len(ctest)))
        ok = False
    # There can be several outputs.  For instance wnen the cell both
    # prints a result (goes to "stdout") and displays an automaton
    # (goes to "data").
    exp = pprint.pformat(cref, width=132)
    eff = pprint.pformat(ctest, width=132)
    if exp[:-1] != '\n':
        exp += '\n'
    if eff[:-1] != '\n':
        eff += '\n'
    if exp == eff:
        return ok
    else:
        print(''.join(diff(exp.splitlines(1), eff.splitlines(1),
                           fromfile='expected', tofile='effective')))
        return False


def _wait_for_ready_backport(kc):
    """Backport BlockingKernelClient.wait_for_ready from IPython 3"""
    # Wait for kernel info reply on shell channel
    kc.kernel_info()
    while True:
        msg = kc.get_shell_msg(block=True, timeout=30)
        if msg['msg_type'] == 'kernel_info_reply':
            break
    # Flush IOPub channel
    while True:
        try:
            msg = kc.get_iopub_msg(block=True, timeout=1)
        except Empty:
            break


def run_cell(kc, cell):
    kc.execute(cell.source)
    outs = []

    while True:
        try:
            msg = kc.get_iopub_msg(timeout=1)
        except Empty:
            if not kc.is_alive():
                raise RuntimeError("Kernel died")
            continue

        msg_type = msg['msg_type']
        content = msg['content']

        if msg_type == 'status' and content['execution_state'] == 'idle':
            break
        if msg_type in ('status', 'pyin', 'execute_input',
                        'comm_open', 'comm_msg'):
            continue
        if msg_type == 'stream':
            if 'Widget' in content['text']:
                continue
            # If the last stream had the same name, then outputs are
            # appended.
            if outs:
                last = outs[-1]
                if last['output_type'] == 'stream' and \
                        last['name'] == content['name']:
                    last['text'] += content['text']
                    continue
        elif msg_type == 'clear_output':
            outs = []
            continue

        content['output_type'] = msg_type
        outs.append(content)
    # Flush shell channel
    while True:
        try:
            kc.get_shell_msg(timeout=0.1)
        except Empty:
            if not kc.is_alive():
                raise RuntimeError("Kernel died")
            break
    return outs


def test_notebook(ipynb):
    with open(ipynb, encoding='utf-8') as f:
        nb = nbformat.reads_json(f.read())
    km = KernelManager()
    # Do not save the history to disk, as it can yield spurious lock errors.
    # See https://github.com/ipython/ipython/issues/2845
    km.start_kernel(extra_arguments=['--HistoryManager.hist_file=:memory:',
                                     '--quiet'])

    kc = km.client()
    kc.start_channels()

    try:
        kc.wait_for_ready(timeout=30)
    except AttributeError:
        _wait_for_ready_backport(kc)

    successes = 0
    failures = 0
    errors = 0
    for i, cell in enumerate(nb.cells):
        if cell.cell_type == 'markdown' and cell.source.startswith("#"):
            title = re.sub(r'^#+ ?', '', cell.source.splitlines()[0])
        if cell.cell_type != 'code' or cell.source.startswith('#DONTCHECK'):
            continue
        try:
            outs = run_cell(kc, cell)
        except Exception as e:
            print("failed to run cell:", repr(e))
            print(cell.input)
            errors += 1
            continue

        failed = not compare_outputs(cell.outputs, outs)
        print("{: <30} {: >2}: ".format(title, i), end="")
        if failed:
            print("FAIL")
            failures += 1
        else:
            print("OK")
            successes += 1

    print("tested notebook %s" % ipynb)
    print("    %3i cells successfully replicated" % successes)
    if failures:
        print("    %3i cells mismatched output" % failures)
    if errors:
        print("    %3i cells failed to complete" % errors)
    kc.stop_channels()
    km.shutdown_kernel()
    del km
    if failures or errors:
        sys.exit(1)


if __name__ == '__main__':
    for ipynb in sys.argv[1:]:
        tries=3
        while tries:
            print("testing %s" % ipynb)
            try:
                test_notebook(ipynb)
                break
            except RuntimeError as e:
                # If the Kernel dies, try again.  It seems we have spurious
                # failures when multiple instances of jupyter start in parallel.
                if 'Kernel died' in str(e):
                    tries -= 1
                    if tries:
                        s = random.randint(1, 5)
                        print("trying again in", s, "seconds...")
                        time.sleep(s)
                else:
                    raise e