# ignore
from bookutils import print_content

# ignore
import inspect

# ignore
_, first_lineno = inspect.getsourcelines(middle)
middle_source = inspect.getsource(middle)
print_content(middle_source, '.py', start_line_number=first_lineno)

708  def middle(x, y, z):  # type: ignore
709      if y < z:
710          if x < y:
711              return y
712          elif x < z:
713              return y
714      else:
715          if x > y:
716              return y
717          elif x > z:
718              return x
719      return z

middle(4, 5, 6)

5

middle(2, 1, 3)

1

def middle_sort_of_fixed(x, y, z):  # type: ignore
    return x

middle_sort_of_fixed(2, 1, 3)

2

def middle_test(x: int, y: int, z: int) -> None:
    m = middle(x, y, z)
    assert m == sorted([x, y, z])[1]

with ExpectError():
    middle_test(2, 1, 3)

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_84986/3661663124.py", line 2, in <module>
    middle_test(2, 1, 3)
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_84986/40742806.py", line 3, in middle_test
    assert m == sorted([x, y, z])[1]
           ^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError (expected)

middle_debugger = OchiaiDebugger()

for x, y, z in MIDDLE_PASSING_TESTCASES + MIDDLE_FAILING_TESTCASES:
    with middle_debugger:
        middle_test(x, y, z)

middle_debugger

 708 def middle(x, y, z):  # type: ignore

 709     if y < z:

 710         if x < y:

 711             return y

 712         elif x < z:

 713             return y

 714     else:

 715         if x > y:

 716             return y

 717         elif x > z:

# ignore
location = middle_debugger.rank()[0]
(func_name, lineno) = location
lines, first_lineno = inspect.getsourcelines(middle)
print(lineno, end="")
print_content(lines[lineno - first_lineno], '.py')

713            return y

# ignore
middle_debugger.suspiciousness(location)

0.9667364890456637

string.ascii_letters

'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'

len(string.ascii_letters + '_') * \
  len(string.ascii_letters + '_' + string.digits) * \
  len(string.ascii_letters + '_' + string.digits)

210357

def middle_tree() -> ast.AST:
    return ast.parse(inspect.getsource(middle))

show_ast(middle_tree())

print(ast.dump(middle_tree()))

Module(body=[FunctionDef(name='middle', args=arguments(posonlyargs=[], args=[arg(arg='x'), arg(arg='y'), arg(arg='z')], kwonlyargs=[], kw_defaults=[], defaults=[]), body=[If(test=Compare(left=Name(id='y', ctx=Load()), ops=[Lt()], comparators=[Name(id='z', ctx=Load())]), body=[If(test=Compare(left=Name(id='x', ctx=Load()), ops=[Lt()], comparators=[Name(id='y', ctx=Load())]), body=[Return(value=Name(id='y', ctx=Load()))], orelse=[If(test=Compare(left=Name(id='x', ctx=Load()), ops=[Lt()], comparators=[Name(id='z', ctx=Load())]), body=[Return(value=Name(id='y', ctx=Load()))], orelse=[])])], orelse=[If(test=Compare(left=Name(id='x', ctx=Load()), ops=[Gt()], comparators=[Name(id='y', ctx=Load())]), body=[Return(value=Name(id='y', ctx=Load()))], orelse=[If(test=Compare(left=Name(id='x', ctx=Load()), ops=[Gt()], comparators=[Name(id='z', ctx=Load())]), body=[Return(value=Name(id='x', ctx=Load()))], orelse=[])])]), Return(value=Name(id='z', ctx=Load()))], decorator_list=[], type_params=[])], type_ignores=[])

ast.dump(middle_tree().body[0].body[0].body[0].body[0])  # type: ignore

"Return(value=Name(id='y', ctx=Load()))"

# ignore
from typing import Any, Callable, Optional, Type, Tuple
from typing import Dict, Union, Set, List, cast

class StatementVisitor(NodeVisitor):
    """Visit all statements within function defs in an AST"""

    def __init__(self) -> None:
        self.statements: List[Tuple[ast.AST, str]] = []
        self.func_name = ""
        self.statements_seen: Set[Tuple[ast.AST, str]] = set()
        super().__init__()

    def add_statements(self, node: ast.AST, attr: str) -> None:
        elems: List[ast.AST] = getattr(node, attr, [])
        if not isinstance(elems, list):
            elems = [elems]  # type: ignore

        for elem in elems:
            stmt = (elem, self.func_name)
            if stmt in self.statements_seen:
                continue

            self.statements.append(stmt)
            self.statements_seen.add(stmt)

    def visit_node(self, node: ast.AST) -> None:
        # Any node other than the ones listed below
        self.add_statements(node, 'body')
        self.add_statements(node, 'orelse')

    def visit_Module(self, node: ast.Module) -> None:
        # Module children are defs, classes and globals - don't add
        super().generic_visit(node)

    def visit_ClassDef(self, node: ast.ClassDef) -> None:
        # Class children are defs and globals - don't add
        super().generic_visit(node)

    def generic_visit(self, node: ast.AST) -> None:
        self.visit_node(node)
        super().generic_visit(node)

    def visit_FunctionDef(self,
                          node: Union[ast.FunctionDef, ast.AsyncFunctionDef]) -> None:
        if not self.func_name:
            self.func_name = node.name

        self.visit_node(node)
        super().generic_visit(node)
        self.func_name = ""

    def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
        return self.visit_FunctionDef(node)

def all_statements_and_functions(tree: ast.AST, 
                                 tp: Optional[Type] = None) -> \
                                 List[Tuple[ast.AST, str]]:
    """
    Return a list of pairs (`statement`, `function`) for all statements in `tree`.
    If `tp` is given, return only statements of that class.
    """

    visitor = StatementVisitor()
    visitor.visit(tree)
    statements = visitor.statements
    if tp is not None:
        statements = [s for s in statements if isinstance(s[0], tp)]

    return statements

def all_statements(tree: ast.AST, tp: Optional[Type] = None) -> List[ast.AST]:
    """
    Return a list of all statements in `tree`.
    If `tp` is given, return only statements of that class.
    """

    return [stmt for stmt, func_name in all_statements_and_functions(tree, tp)]

all_statements(middle_tree(), ast.Return)

[<ast.Return at 0x10faea510>,
 <ast.Return at 0x10f996cd0>,
 <ast.Return at 0x10f995c10>,
 <ast.Return at 0x10f996c90>,
 <ast.Return at 0x10faea810>]

all_statements_and_functions(middle_tree(), ast.If)

[(<ast.If at 0x10fa16a90>, 'middle'),
 (<ast.If at 0x10fa17890>, 'middle'),
 (<ast.If at 0x10fa17ed0>, 'middle'),
 (<ast.If at 0x10fa15150>, 'middle'),
 (<ast.If at 0x10fa15a50>, 'middle')]

random_node = random.choice(all_statements(middle_tree()))
ast.unparse(random_node)

'return y'

class StatementMutator(NodeTransformer):
    """Mutate statements in an AST for automated repair."""

    def __init__(self,
                 suspiciousness_func:
                     Optional[Callable[[Tuple[Callable, int]], float]] = None,
                 source: Optional[List[ast.AST]] = None,
                 log: Union[bool, int] = False) -> None:
        """
        Constructor.
        `suspiciousness_func` is a function that takes a location
        (function, line_number) and returns a suspiciousness value
        between 0 and 1.0. If not given, all locations get the same 
        suspiciousness of 1.0.
        `source` is a list of statements to choose from.
        """

        super().__init__()
        self.log = log

        if suspiciousness_func is None:
            def suspiciousness_func(location: Tuple[Callable, int]) -> float:
                return 1.0
        assert suspiciousness_func is not None

        self.suspiciousness_func: Callable = suspiciousness_func

        if source is None:
            source = []
        self.source = source

        if self.log > 1:
            for i, node in enumerate(self.source):
                print(f"Source for repairs #{i}:")
                print_content(ast.unparse(node), '.py')
                print()
                print()

        self.mutations = 0

class StatementMutator(StatementMutator):
    def node_suspiciousness(self, stmt: ast.AST, func_name: str) -> float:
        if not hasattr(stmt, 'lineno'):
            warnings.warn(f"{self.format_node(stmt)}: Expected line number")
            return 0.0

        suspiciousness = self.suspiciousness_func((func_name, stmt.lineno))
        if suspiciousness is None:  # not executed
            return 0.0

        return suspiciousness

    def format_node(self, node: ast.AST) -> str:  # type: ignore
        ...

class StatementMutator(StatementMutator):
    def node_to_be_mutated(self, tree: ast.AST) -> ast.AST:
        statements = all_statements_and_functions(tree)
        assert len(statements) > 0, "No statements"

        weights = [self.node_suspiciousness(stmt, func_name) 
                   for stmt, func_name in statements]
        stmts = [stmt for stmt, func_name in statements]

        if self.log > 1:
            print("Weights:")
            for i, stmt in enumerate(statements):
                node, func_name = stmt
                print(f"{weights[i]:.2} {self.format_node(node)}")

        if sum(weights) == 0.0:
            # No suspicious line
            return random.choice(stmts)
        else:
            return random.choices(stmts, weights=weights)[0]

RE_SPACE = re.compile(r'[ \t\n]+')

class StatementMutator(StatementMutator):
    def choose_op(self) -> Callable:
        return random.choice([self.insert, self.swap, self.delete])

    def visit(self, node: ast.AST) -> ast.AST:
        super().visit(node)  # Visits (and transforms?) children

        if not node.mutate_me:  # type: ignore
            return node

        op = self.choose_op()
        new_node = op(node)
        self.mutations += 1

        if self.log:
            print(f"{node.lineno:4}:{op.__name__ + ':':7} "  # type: ignore
                  f"{self.format_node(node)} "
                  f"becomes {self.format_node(new_node)}")

        return new_node

if P:
    BODY

class StatementMutator(StatementMutator):
    def choose_statement(self) -> ast.AST:
        return copy.deepcopy(random.choice(self.source))

class StatementMutator(StatementMutator):
    def swap(self, node: ast.AST) -> ast.AST:
        """Replace `node` with a random node from `source`"""
        new_node = self.choose_statement()

        if isinstance(new_node, ast.stmt):
            # The source `if P: X` is added as `if P: pass`
            if hasattr(new_node, 'body'):
                new_node.body = [ast.Pass()]  # type: ignore
            if hasattr(new_node, 'orelse'):
                new_node.orelse = []  # type: ignore
            if hasattr(new_node, 'finalbody'):
                new_node.finalbody = []  # type: ignore

        # ast.copy_location(new_node, node)
        return new_node

if P:
    BODY

class StatementMutator(StatementMutator):
    def insert(self, node: ast.AST) -> Union[ast.AST, List[ast.AST]]:
        """Insert a random node from `source` after `node`"""
        new_node = self.choose_statement()

        if isinstance(new_node, ast.stmt) and hasattr(new_node, 'body'):
            # Inserting `if P: X` as `if P:`
            new_node.body = [node]  # type: ignore
            if hasattr(new_node, 'orelse'):
                new_node.orelse = []  # type: ignore
            if hasattr(new_node, 'finalbody'):
                new_node.finalbody = []  # type: ignore
            # ast.copy_location(new_node, node)
            return new_node

        # Only insert before `return`, not after it
        if isinstance(node, ast.Return):
            if isinstance(new_node, ast.Return):
                return new_node
            else:
                return [new_node, node]

        return [node, new_node]

if P:
    BODY

class StatementMutator(StatementMutator):
    def delete(self, node: ast.AST) -> None:
        """Delete `node`."""

        branches = [attr for attr in ['body', 'orelse', 'finalbody']
                    if hasattr(node, attr) and getattr(node, attr)]
        if branches:
            # Replace `if P: S` by `S`
            branch = random.choice(branches)
            new_node = getattr(node, branch)
            return new_node

        if isinstance(node, ast.stmt):
            # Avoid empty bodies; make this a `pass` statement
            new_node = ast.Pass()
            ast.copy_location(new_node, node)
            return new_node

        return None  # Just delete

quiz("Why are statements replaced by `pass` rather than deleted?",
     [
         "Because `if P: pass` is valid Python, while `if P:` is not",
         "Because in Python, bodies for `if`, `while`, etc. cannot be empty",
         "Because a `pass` node makes a target for future mutations",
         "Because it causes the tests to pass"
     ], '[3 ^ n for n in range(3)]')

class StatementMutator(StatementMutator):
    NODE_MAX_LENGTH = 20

    def format_node(self, node: ast.AST) -> str:
        """Return a string representation for `node`."""
        if node is None:
            return "None"

        if isinstance(node, list):
            return "; ".join(self.format_node(elem) for elem in node)

        s = RE_SPACE.sub(' ', ast.unparse(node)).strip()
        if len(s) > self.NODE_MAX_LENGTH - len("..."):
            s = s[:self.NODE_MAX_LENGTH] + "..."
        return repr(s)

class StatementMutator(StatementMutator):
    def mutate(self, tree: ast.AST) -> ast.AST:
        """Mutate the given AST `tree` in place. Return mutated tree."""

        assert isinstance(tree, ast.AST)

        tree = copy.deepcopy(tree)

        if not self.source:
            self.source = all_statements(tree)

        for node in ast.walk(tree):
            node.mutate_me = False  # type: ignore

        node = self.node_to_be_mutated(tree)
        node.mutate_me = True  # type: ignore

        self.mutations = 0

        tree = self.visit(tree)

        if self.mutations == 0:
            warnings.warn("No mutations found")

        ast.fix_missing_locations(tree)
        return tree

mutator = StatementMutator(log=True)
for i in range(10):
    new_tree = mutator.mutate(middle_tree())

   9:insert: 'return y' becomes 'return y'
   8:insert: 'if x > y: return y e...' becomes 'if x < y: if x > y: ...'
  12:insert: 'return z' becomes 'if y < z: return z...'
   3:swap:   'if x < y: return y e...' becomes 'return x'
   3:swap:   'if x < y: return y e...' becomes 'return z'
   3:swap:   'if x < y: return y e...' becomes 'return x'
  11:swap:   'return x' becomes 'return y'
  10:insert: 'if x > z: return x...' becomes 'if x > z: return x...'; 'return z'
  12:delete: 'return z' becomes 'pass'
   8:swap:   'if x > y: return y e...' becomes 'if y < z: pass'

print_content(ast.unparse(new_tree), '.py')

def middle(x, y, z):
    if y < z:
        if x < y:
            return y
        elif x < z:
            return y
    elif y < z:
        pass
    return z

WEIGHT_PASSING = 0.99
WEIGHT_FAILING = 0.01

def middle_fitness(tree: ast.AST) -> float:
    """Compute fitness of a `middle()` candidate given in `tree`"""
    original_middle = middle

    try:
        code = compile(cast(ast.Module, tree), '<fitness>', 'exec')
    except ValueError:
        return 0  # Compilation error

    exec(code, globals())

    passing_passed = 0
    failing_passed = 0

    # Test how many of the passing runs pass
    for x, y, z in MIDDLE_PASSING_TESTCASES:
        try:
            middle_test(x, y, z)
            passing_passed += 1
        except AssertionError:
            pass

    passing_ratio = passing_passed / len(MIDDLE_PASSING_TESTCASES)

    # Test how many of the failing runs pass
    for x, y, z in MIDDLE_FAILING_TESTCASES:
        try:
            middle_test(x, y, z)
            failing_passed += 1
        except AssertionError:
            pass

    failing_ratio = failing_passed / len(MIDDLE_FAILING_TESTCASES)

    fitness = (WEIGHT_PASSING * passing_ratio +
               WEIGHT_FAILING * failing_ratio)

    globals()['middle'] = original_middle
    return fitness

middle_fitness(middle_tree())

0.99

middle_fitness(ast.parse("def middle(x, y, z): return x"))

0.4258

middle_fixed_source = \
    inspect.getsource(middle_fixed).replace('middle_fixed', 'middle').strip()

middle_fitness(ast.parse(middle_fixed_source))

1.0

POPULATION_SIZE = 40
middle_mutator = StatementMutator()

MIDDLE_POPULATION = [middle_tree()] + \
    [middle_mutator.mutate(middle_tree()) for i in range(POPULATION_SIZE - 1)]

MIDDLE_POPULATION.sort(key=middle_fitness, reverse=True)

print(ast.unparse(MIDDLE_POPULATION[0]),
      middle_fitness(MIDDLE_POPULATION[0]))

def middle(x, y, z):
    if y < z:
        if x < y:
            return y
        elif x < z:
            return y
    elif x > y:
        return y
    elif x > z:
        return x
    return z 0.99

print(ast.unparse(MIDDLE_POPULATION[-1]),
      middle_fitness(MIDDLE_POPULATION[-1]))

def middle(x, y, z):
    if y < z:
        if x < y:
            return y
        elif x < z:
            return y
    else:
        return y
    return z 0.5445

def evolve_middle() -> None:
    global MIDDLE_POPULATION

    source = all_statements(middle_tree())
    mutator = StatementMutator(source=source)

    n = len(MIDDLE_POPULATION)

    offspring: List[ast.AST] = []
    while len(offspring) < n:
        parent = random.choice(MIDDLE_POPULATION)
        offspring.append(mutator.mutate(parent))

    MIDDLE_POPULATION += offspring
    MIDDLE_POPULATION.sort(key=middle_fitness, reverse=True)
    MIDDLE_POPULATION = MIDDLE_POPULATION[:n]

evolve_middle()

tree = MIDDLE_POPULATION[0]
print(ast.unparse(tree), middle_fitness(tree))

def middle(x, y, z):
    if y < z:
        if x < y:
            return y
        elif x < z:
            return y
    elif x > y:
        return y
    elif x > z:
        return x
    return z 0.99

# docassert
assert middle_fitness(tree) < 1.0

for i in range(50):
    evolve_middle()
    best_middle_tree = MIDDLE_POPULATION[0]
    fitness = middle_fitness(best_middle_tree)
    print(f"\rIteration {i:2}: fitness = {fitness}  ", end="")
    if fitness >= 1.0:
        break

Iteration  0: fitness = 0.99  
Iteration  1: fitness = 1.0

# docassert
assert middle_fitness(best_middle_tree) >= 1.0

print_content(ast.unparse(best_middle_tree), '.py', start_line_number=1)

 1  def middle(x, y, z):
 2      if y < z:
 3          if x < y:
 4              if x < z:
 5                  return y
 6          elif x < z:
 7              return x
 8      elif x > y:
 9          return y
10      else:
11          if x > z:
12              return x
13          return z
14      return z

original_middle = middle
code = compile(cast(ast.Module, best_middle_tree), '<string>', 'exec')
exec(code, globals())

for x, y, z in MIDDLE_PASSING_TESTCASES + MIDDLE_FAILING_TESTCASES:
    middle_test(x, y, z)

middle = original_middle

quiz("Some of the lines in our fix candidate are redundant. "
     "Which are these?",
    [
        "Line 3: `if x < y:`",
        "Line 4: `if x < z:`",
        "Line 5: `return y`",
        "Line 13: `return z`"
    ], '[eval(chr(100 - x)) for x in [48, 50]]')

middle_lines = ast.unparse(best_middle_tree).strip().split('\n')

def test_middle_lines(lines: List[str]) -> None:
    source = "\n".join(lines)
    tree = ast.parse(source)
    assert middle_fitness(tree) < 1.0  # "Fail" only while fitness is 1.0

with DeltaDebugger() as dd:
    test_middle_lines(middle_lines)

reduced_lines = dd.min_args()['lines']

reduced_source = "\n".join(reduced_lines)

repaired_source = ast.unparse(ast.parse(reduced_source))  # normalize
print_content(repaired_source, '.py')

def middle(x, y, z):
    if y < z:
        if x < y:
            return y
        elif x < z:
            return x
    elif x > y:
        return y
    elif x > z:
        return x
    return z

# docassert
assert len(reduced_lines) < len(middle_lines)

original_source = ast.unparse(ast.parse(middle_source))  # normalize

for patch in diff(original_source, repaired_source):
    print_patch(patch)

@@ -87,37 +87,37 @@
  x < z:

-            return y

+            return x

     elif

crossover = CrossoverOperator()
crossover.crossover(tree_p1, tree_p2)

def p1():  # type: ignore
    if True:
        print(1)
        print(2)
        print(3)

def p2():  # type: ignore
    if True:
        print(a)
        print(b)
    else:
        print(c)
        print(d)

crossover = CrossoverOperator()
tree_p1 = ast.parse(inspect.getsource(p1))
tree_p2 = ast.parse(inspect.getsource(p2))
crossover.crossover(tree_p1, tree_p2);

print_content(ast.unparse(tree_p1), '.py')

def p1():
    if True:
        print(c)
        print(d)
    else:
        print(a)
        print(b)

print_content(ast.unparse(tree_p2), '.py')

def p2():
    if True:
    else:
        print(1)
        print(2)
        print(3)

middle_t1, middle_t2 = crossover.crossover(middle_tree(),
                                          ast.parse(inspect.getsource(p2)))

print_content(ast.unparse(middle_t1), '.py')

def middle(x, y, z):
    if y < z:
        print(c)
        print(d)
    else:
        print(a)
        print(b)
    return z

print_content(ast.unparse(middle_t2), '.py')

def p2():
    if True:
        if x > y:
            return y
        elif x > z:
            return x
    elif x < y:
        return y
    elif x < z:
        return y

debugger = OchiaiDebugger()
with debugger:
    <passing test>
with debugger:
    <failing test>
...
repairer = Repairer(debugger)
repairer.repair()

repairer = Repairer(middle_debugger, log=True)

Target code to be repaired:
def middle(x, y, z):
    if y < z:
        if x < y:
            return y
        elif x < z:
            return y
    elif x > y:
        return y
    elif x > z:
        return x
    return z

best_tree, fitness = repairer.repair()

Evolving population: iteration   0/100 fitness = 1.0   
Best code (fitness = 1.0):
def middle(x, y, z):
    if y < z:
        if x < y:
            return y
        elif x < z:
            return x
    elif x > y:
        return y
    elif x > z:
        return x
    return z

Reduced code (fitness = 1.0):
def middle(x, y, z):
    if y < z:
        if x < y:
            return y
        elif x < z:
            return x
    elif x > y:
        return y
    elif x > z:
        return x
    return z

print_content(ast.unparse(best_tree), '.py')

def middle(x, y, z):
    if y < z:
        if x < y:
            return y
        elif x < z:
            return x
    elif x > y:
        return y
    elif x > z:
        return x
    return z

fitness

1.0

# docassert
assert fitness >= 1.0

def remove_html_markup(s):  # type: ignore
    tag = False
    quote = False
    out = ""

    for c in s:
        if c == '<' and not quote:
            tag = True
        elif c == '>' and not quote:
            tag = False
        elif c == '"' or c == "'" and tag:
            quote = not quote
        elif not tag:
            out = out + c

    return out

def remove_html_markup_tree() -> ast.AST:
    return ast.parse(inspect.getsource(remove_html_markup))

def remove_html_markup_test(html: str, plain: str) -> None:
    outcome = remove_html_markup(html)
    assert outcome == plain, \
        f"Got {repr(outcome)}, expected {repr(plain)}"

REMOVE_HTML_PASSING_TESTCASES[0]

('Sg$VT<fqlui ppzww="!EyHN">J9Ji </fqlui>.)!$', 'Sg$VTJ9Ji .)!$')

html, plain = REMOVE_HTML_PASSING_TESTCASES[0]
remove_html_markup_test(html, plain)

REMOVE_HTML_FAILING_TESTCASES[0]

('3AGe<qcguk yewyq="wA^<S">7"!%H</qcguk>6azh_', '3AGe7"!%H6azh_')

with ExpectError():
    html, plain = REMOVE_HTML_FAILING_TESTCASES[0]
    remove_html_markup_test(html, plain)

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_84986/2578453007.py", line 3, in <module>
    remove_html_markup_test(html, plain)
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_84986/700130947.py", line 3, in remove_html_markup_test
    assert outcome == plain, \
           ^^^^^^^^^^^^^^^^
AssertionError: Got '3AGe7!%H</qcguk>6azh_', expected '3AGe7"!%H6azh_' (expected)

html_debugger = OchiaiDebugger()

for html, plain in (REMOVE_HTML_PASSING_TESTCASES + 
                    REMOVE_HTML_FAILING_TESTCASES):
    with html_debugger:
        remove_html_markup_test(html, plain)

html_debugger

   1 def remove_html_markup(s):  # type: ignore

   2     tag = False

   3     quote = False

   4     out = ""

   5

   6     for c in s:

   7         if c == '<' and not quote:

   8             tag = True

   9         elif c == '>' and not quote:

  10             tag = False

html_repairer = Repairer(html_debugger, log=True)

Target code to be repaired:
def remove_html_markup(s):
    tag = False
    quote = False
    out = ''
    for c in s:
        if c == '<' and (not quote):
            tag = True
        elif c == '>' and (not quote):
            tag = False
        elif c == '"' or (c == "'" and tag):
            quote = not quote
        elif not tag:
            out = out + c
    return out

best_tree, fitness = html_repairer.repair(iterations=20)

Evolving population: iteration   0/20 fitness = 0.99

Evolving population: iteration   1/20 fitness = 0.99

Evolving population: iteration   2/20 fitness = 0.99   
Evolving population: iteration   3/20 fitness = 0.99

Evolving population: iteration   4/20 fitness = 0.99   
Evolving population: iteration   5/20 fitness = 0.99

Evolving population: iteration   6/20 fitness = 0.99   
Evolving population: iteration   7/20 fitness = 0.99

Evolving population: iteration   8/20 fitness = 0.99

Evolving population: iteration   9/20 fitness = 0.99

Evolving population: iteration  10/20 fitness = 0.99

Evolving population: iteration  11/20 fitness = 0.99

Evolving population: iteration  12/20 fitness = 0.99

# docassert
assert fitness < 1.0

quiz("Why couldn't `Repairer()` repair `remove_html_markup()`?",
     [
         "The population is too small!",
         "The suspiciousness is too evenly distributed!",
         "We need more test cases!",
         "We need more iterations!",
         "There is no statement in the source with a correct condition!",
         "The population is too big!",
     ], '5242880 >> 20')

def all_conditions(trees: Union[ast.AST, List[ast.AST]],
                   tp: Optional[Type] = None) -> List[ast.expr]:
    """
    Return all conditions from the AST (or AST list) `trees`.
    If `tp` is given, return only elements of that type.
    """

    if not isinstance(trees, list):
        assert isinstance(trees, ast.AST)
        trees = [trees]

    visitor = ConditionVisitor()
    for tree in trees:
        visitor.visit(tree)
    conditions = visitor.conditions
    if tp is not None:
        conditions = [c for c in conditions if isinstance(c, tp)]

    return conditions

class ConditionVisitor(NodeVisitor):
    def __init__(self) -> None:
        self.conditions: List[ast.expr] = []
        self.conditions_seen: Set[str] = set()
        super().__init__()

    def add_conditions(self, node: ast.AST, attr: str) -> None:
        elems = getattr(node, attr, [])
        if not isinstance(elems, list):
            elems = [elems]

        elems = cast(List[ast.expr], elems)

        for elem in elems:
            elem_str = ast.unparse(elem)
            if elem_str not in self.conditions_seen:
                self.conditions.append(elem)
                self.conditions_seen.add(elem_str)

    def visit_BoolOp(self, node: ast.BoolOp) -> ast.AST:
        self.add_conditions(node, 'values')
        return super().generic_visit(node)

    def visit_UnaryOp(self, node: ast.UnaryOp) -> ast.AST:
        if isinstance(node.op, ast.Not):
            self.add_conditions(node, 'operand')
        return super().generic_visit(node)

    def generic_visit(self, node: ast.AST) -> ast.AST:
        if hasattr(node, 'test'):
            self.add_conditions(node, 'test')
        return super().generic_visit(node)

[ast.unparse(cond).strip()
    for cond in all_conditions(remove_html_markup_tree())]

["c == '<' and (not quote)",
 "c == '<'",
 'not quote',
 'quote',
 "c == '>' and (not quote)",
 "c == '>'",
 'c == \'"\' or (c == "\'" and tag)',
 'c == \'"\'',
 'c == "\'" and tag',
 'c == "\'"',
 'tag',
 'not tag']

class ConditionMutator(StatementMutator):
    """Mutate conditions in an AST"""

    def __init__(self, *args: Any, **kwargs: Any) -> None:
        """Constructor. Arguments are as with `StatementMutator` constructor."""
        super().__init__(*args, **kwargs)
        self.conditions = all_conditions(self.source)
        if self.log:
            print("Found conditions",
                  [ast.unparse(cond).strip() 
                   for cond in self.conditions])

    def choose_condition(self) -> ast.expr:
        """Return a random condition from source."""
        return copy.deepcopy(random.choice(self.conditions))

class ConditionMutator(ConditionMutator):
    def choose_bool_op(self) -> str:
        return random.choice(['set', 'not', 'and', 'or'])

    def swap(self, node: ast.AST) -> ast.AST:
        """Replace `node` condition by a condition from `source`"""
        if not hasattr(node, 'test'):
            return super().swap(node)

        node = cast(ast.If, node)

        cond = self.choose_condition()
        new_test = None

        choice = self.choose_bool_op()

        if choice == 'set':
            new_test = cond
        elif choice == 'not':
            new_test = ast.UnaryOp(op=ast.Not(), operand=node.test)
        elif choice == 'and':
            new_test = ast.BoolOp(op=ast.And(), values=[cond, node.test])
        elif choice == 'or':
            new_test = ast.BoolOp(op=ast.Or(), values=[cond, node.test])
        else:
            raise ValueError("Unknown boolean operand")

        if new_test:
            # ast.copy_location(new_test, node)
            node.test = new_test

        return node

mutator = ConditionMutator(source=all_statements(remove_html_markup_tree()),
                           log=True)

Found conditions ["c == '<' and (not quote)", "c == '<'", 'not quote', 'quote', "c == '>' and (not quote)", "c == '>'", 'c == \'"\' or (c == "\'" and tag)', 'c == \'"\'', 'c == "\'" and tag', 'c == "\'"', 'tag', 'not tag']

for i in range(10):
    new_tree = mutator.mutate(remove_html_markup_tree())

   2:insert: 'tag = False' becomes 'for c in s: tag = Fa...'
  10:insert: 'tag = False' becomes 'tag = False'; 'out = out + c'
   8:insert: 'tag = True' becomes 'if c == \'"\' or (c ==...'
  12:insert: 'quote = not quote' becomes 'quote = not quote'; 'tag = True'
  10:delete: 'tag = False' becomes 'pass'
  12:insert: 'quote = not quote' becomes "if c == '>' and (not..."
   3:insert: 'quote = False' becomes 'quote = False'; "out = ''"
  14:swap:   'out = out + c' becomes 'quote = False'
  12:insert: 'quote = not quote' becomes 'for c in s: quote = ...'
   3:delete: 'quote = False' becomes 'pass'

condition_repairer = Repairer(html_debugger,
                              mutator_class=ConditionMutator,
                              log=2)

Target code to be repaired:
def remove_html_markup(s):
    tag = False
    quote = False
    out = ''
    for c in s:
        if c == '<' and (not quote):
            tag = True
        elif c == '>' and (not quote):
            tag = False
        elif c == '"' or (c == "'" and tag):
            quote = not quote
        elif not tag:
            out = out + c
    return out

best_tree, fitness = condition_repairer.repair(iterations=200)

Evolving population: iteration   0/200 fitness = 0.99

Evolving population: iteration   1/200 fitness = 0.99

Evolving population: iteration   2/200 fitness = 0.99   
Evolving population: iteration   3/200 fitness = 0.99

Evolving population: iteration   4/200 fitness = 0.99

Evolving population: iteration   5/200 fitness = 0.99   
Evolving population: iteration   6/200 fitness = 0.99

Evolving population: iteration   7/200 fitness = 0.99

Evolving population: iteration   8/200 fitness = 0.99

Evolving population: iteration   9/200 fitness = 0.99

Evolving population: iteration  10/200 fitness = 0.99

Evolving population: iteration  11/200 fitness = 0.99

repaired_source = ast.unparse(best_tree)

print_content(repaired_source, '.py')

def remove_html_markup(s):
    tag = False
    quote = False
    out = ''
    for c in s:
        if c == '<' and (not quote):
            tag = True
        elif c == '>' and (not quote):
            tag = False
        elif tag and c == '"':
            quote = not quote
        elif not tag:
            out = out + c
    return out

# docassert
assert fitness >= 1.0

original_source = ast.unparse(remove_html_markup_tree())

for patch in diff(original_source, repaired_source):
    print_patch(patch)

@@ -210,53 +210,39 @@
 lse

-        elif c == '"' or (c == "'" and tag):

+        elif tag and c == '"':

quiz("Is this actually the best solution?",
    [
        "Yes, sure, of course. Why?",
        "Err - what happened to single quotes?"
    ], 1 << 1)

quiz("Why aren't single quotes handled in the solution?",
    [
        "Because they're not important. "
            "I mean, y'know, who uses 'em anyway?",
        "Because they are not part of our tests? "
            "Let me look up how they are constructed..."
    ], 1 << 1)

with html_debugger:
    remove_html_markup_test("<foo quote='>abc'>me</foo>", "me")

best_tree, fitness = condition_repairer.repair(iterations=200)

Evolving population: iteration   0/200 fitness = 0.99

Evolving population: iteration   1/200 fitness = 0.99   
Evolving population: iteration   2/200 fitness = 1.0   

New best code (fitness = 1.0):

def remove_html_markup(s):
    tag = False
    quote = False
    out = ''
    for c in s:
        if c == '<' and (not quote):
            tag = True
        elif c == '>' and (not quote):
            tag = False
        elif tag and (c == '"' or (c == "'" and tag)):
            quote = not quote
        elif not tag:
            out = out + c
    if not tag:
        tag = False
        return out


Reduced code (fitness = 1.0):
def remove_html_markup(s):
    tag = False
    quote = False
    out = ''
    for c in s:
        if c == '<' and (not quote):
            tag = True
        elif c == '>' and (not quote):
            tag = False
        elif tag and (c == '"' or (c == "'" and tag)):
            quote = not quote
        elif not tag:
            out = out + c
    if not tag:
        return out

print_content(ast.unparse(best_tree), '.py')

def remove_html_markup(s):
    tag = False
    quote = False
    out = ''
    for c in s:
        if c == '<' and (not quote):
            tag = True
        elif c == '>' and (not quote):
            tag = False
        elif tag and (c == '"' or (c == "'" and tag)):
            quote = not quote
        elif not tag:
            out = out + c
    if not tag:
        return out

fitness

1.0

# docassert
assert fitness >= 1.0

from debuggingbook.StatisticalDebugger import OchiaiDebugger

debugger = OchiaiDebugger()
for inputs in TESTCASES:
    with debugger:
        test_foo(inputs)
...

repairer = Repairer(debugger)

tree, fitness = repairer.repair()
print(ast.unparse(tree), fitness)

# ignore
print_content(middle_source, '.py')

def middle(x, y, z):  # type: ignore
    if y < z:
        if x < y:
            return y
        elif x < z:
            return y
    else:
        if x > y:
            return y
        elif x > z:
            return x
    return z

middle_debugger = OchiaiDebugger()

for x, y, z in MIDDLE_PASSING_TESTCASES + MIDDLE_FAILING_TESTCASES:
    with middle_debugger:
        middle_test(x, y, z)

middle_repairer = Repairer(middle_debugger)

tree, fitness = middle_repairer.repair()

print(ast.unparse(tree))

def middle(x, y, z):
    if y < z:
        if x < y:
            return y
        elif x < z:
            return x
    elif x > y:
        return y
    elif x > z:
        return x
    return z

fitness

1.0

# docassert
assert fitness >= 1.0

# ignore
from ClassDiagram import display_class_hierarchy

# ignore
display_class_hierarchy([Repairer, ConditionMutator, CrossoverOperator],
                        abstract_classes=[
                            NodeVisitor,
                            NodeTransformer
                        ],
                        public_methods=[
                            Repairer.__init__,
                            Repairer.repair,
                            StatementMutator.__init__,
                            StatementMutator.mutate,
                            ConditionMutator.__init__,
                            CrossoverOperator.__init__,
                            CrossoverOperator.crossover,
                        ],
                        project='debuggingbook')

with ExpectError():
    square_root_of_zero = square_root(0)

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_84986/1107282428.py", line 2, in <module>
    square_root_of_zero = square_root(0)
                          ^^^^^^^^^^^^^^
  File "Assertions.ipynb", line 61, in square_root
    guess = (approx + x / approx) / 2
                      ~~^~~~~~~~
ZeroDivisionError: float division by zero (expected)

Repairing Code Automatically¶

Automatic Code Repairs¶

The middle() Function¶

Validated Repairs¶

Genetic Optimization¶

A Test Suite¶

Locating the Defect¶

Random Code Mutations¶

Picking Statements¶

Mutating Statements¶

Choosing Suspicious Statements to Mutate¶

Choosing a Mutation Method¶

Swapping Statements¶

Inserting Statements¶

Deleting Statements¶

Quiz

Helpers¶

All Together¶

Fitness¶

Population¶

Evolution¶

Quiz

Simplifying¶

Crossover¶

Crossover in Action¶

A Repairer Class¶

Repairer in Action¶

Removing HTML Markup¶

Quiz

Mutating Conditions¶

Collecting Conditions¶

Mutating Conditions¶

Quiz

Quiz

Limitations¶

Synopsis¶

Lessons Learned¶

Background¶

Exercises¶

Exercise 1: Automated Repair Parameters¶

Exercise 2: Elitism¶

Exercise 3: Evolving Values¶

Exercise 4: Evolving Variable Names¶

Exercise 5: Parallel Repair¶