knowledge-base/scripts/kb-list-issues.py

#!/usr/bin/env python3
"""Temporary diagnostic — list all KB issues in full."""
import re
from pathlib import Path
from collections import defaultdict

VAULT = Path(__file__).resolve().parent.parent
INCLUDE_DIRS = ['decisions', 'notes', 'projects', 'snippets', 'daily',
                'claude-memory', 'templates', 'scripts', 'audit']
EXCLUDE = {'.git', '.obsidian', '.claude'}

files = []
for d in INCLUDE_DIRS:
    root = VAULT / d
    if not root.exists():
        continue
    for p in root.rglob('*.md'):
        if any(part in EXCLUDE for part in p.parts):
            continue
        files.append(p)
for name in ('CLAUDE.md', 'README.md'):
    p = VAULT / name
    if p.is_file():
        files.append(p)

def has_fm(txt):
    if not txt.startswith('---\n'):
        return False
    return bool(re.match(r'---\n(.*?\n)*?---\n', txt))

def strip_code(t):
    t = re.sub(r'```[\s\S]*?```', '', t)
    t = re.sub(r'`[^`\n]*`', '', t)
    return t

def wikilinks(t):
    out = []
    for m in re.finditer(r'\[\[([^\]]+)\]\]', strip_code(t)):
        tgt = m.group(1).split('|')[0].split('#')[0].strip()
        if tgt:
            out.append(tgt)
    return out

basenames = defaultdict(list)
for p in files:
    basenames[p.stem].append(p)

def resolve_wl(target, from_file):
    tc = target.replace('.md', '')
    if tc.startswith(('.', '/')):
        try:
            r = (from_file.parent / tc).resolve()
            for c in (r.with_suffix('.md'), r):
                if c.is_file() and str(c).endswith('.md'):
                    return c
        except Exception:
            pass
    g = VAULT / f'{tc}.md'
    if g.is_file():
        return g
    b = tc.rsplit('/', 1)[-1]
    if b in basenames:
        return basenames[b][0]
    return None

missing = []
incoming = defaultdict(set)
todos = defaultdict(list)

for f in files:
    txt = f.read_text(errors='ignore')
    rel = str(f.relative_to(VAULT))
    if f.name not in ('README.md', 'CLAUDE.md') and not has_fm(txt):
        missing.append(rel)
    for target in wikilinks(txt):
        r = resolve_wl(target, f)
        if r:
            incoming[r].add(f)
    for i, line in enumerate(txt.splitlines(), 1):
        if re.match(r'^\s*-\s*\[\s*\]\s+', line):
            if not re.search(r'\d{4}-\d{2}-\d{2}', line):
                todos[rel].append((i, line))

ORPHAN_OK = [
    re.compile(r'^daily/.*'),
    re.compile(r'^audit/.*'),
    re.compile(r'^templates/.*'),
    re.compile(r'^notes/claude/.*'),
    re.compile(r'^scripts/.*'),
    re.compile(r'^CLAUDE\.md$'),
    re.compile(r'^README\.md$'),
]
def is_ok(rel):
    return any(p.match(rel) for p in ORPHAN_OK)

orphans = []
for f in files:
    if f not in incoming:
        rel = str(f.relative_to(VAULT))
        if not is_ok(rel):
            orphans.append(rel)

print('=== MISSING FRONTMATTER ({}) ==='.format(len(missing)))
for x in missing:
    print(x)
print()
print('=== ORPHANS ({}) ==='.format(len(orphans)))
for x in orphans:
    print(x)
print()
print('=== UNDATED TODOS ({}) ==='.format(sum(len(v) for v in todos.values())))
for f, lines in sorted(todos.items(), key=lambda x: -len(x[1])):
    print('FILE:', f, '({})'.format(len(lines)))
    for ln, line in lines:
        print('  L{}: {}'.format(ln, line.strip()))