Files
knowledge-base/scripts/kb-list-issues.py

116 lines
3.2 KiB
Python

#!/usr/bin/env python3
"""Temporary diagnostic — list all KB issues in full."""
import re
from pathlib import Path
from collections import defaultdict
VAULT = Path(__file__).resolve().parent.parent
INCLUDE_DIRS = ['decisions', 'notes', 'projects', 'snippets', 'daily',
'claude-memory', 'templates', 'scripts', 'audit']
EXCLUDE = {'.git', '.obsidian', '.claude'}
files = []
for d in INCLUDE_DIRS:
root = VAULT / d
if not root.exists():
continue
for p in root.rglob('*.md'):
if any(part in EXCLUDE for part in p.parts):
continue
files.append(p)
for name in ('CLAUDE.md', 'README.md'):
p = VAULT / name
if p.is_file():
files.append(p)
def has_fm(txt):
if not txt.startswith('---\n'):
return False
return bool(re.match(r'---\n(.*?\n)*?---\n', txt))
def strip_code(t):
t = re.sub(r'```[\s\S]*?```', '', t)
t = re.sub(r'`[^`\n]*`', '', t)
return t
def wikilinks(t):
out = []
for m in re.finditer(r'\[\[([^\]]+)\]\]', strip_code(t)):
tgt = m.group(1).split('|')[0].split('#')[0].strip()
if tgt:
out.append(tgt)
return out
basenames = defaultdict(list)
for p in files:
basenames[p.stem].append(p)
def resolve_wl(target, from_file):
tc = target.replace('.md', '')
if tc.startswith(('.', '/')):
try:
r = (from_file.parent / tc).resolve()
for c in (r.with_suffix('.md'), r):
if c.is_file() and str(c).endswith('.md'):
return c
except Exception:
pass
g = VAULT / f'{tc}.md'
if g.is_file():
return g
b = tc.rsplit('/', 1)[-1]
if b in basenames:
return basenames[b][0]
return None
missing = []
incoming = defaultdict(set)
todos = defaultdict(list)
for f in files:
txt = f.read_text(errors='ignore')
rel = str(f.relative_to(VAULT))
if f.name not in ('README.md', 'CLAUDE.md') and not has_fm(txt):
missing.append(rel)
for target in wikilinks(txt):
r = resolve_wl(target, f)
if r:
incoming[r].add(f)
for i, line in enumerate(txt.splitlines(), 1):
if re.match(r'^\s*-\s*\[\s*\]\s+', line):
if not re.search(r'\d{4}-\d{2}-\d{2}', line):
todos[rel].append((i, line))
ORPHAN_OK = [
re.compile(r'^daily/.*'),
re.compile(r'^audit/.*'),
re.compile(r'^templates/.*'),
re.compile(r'^notes/claude/.*'),
re.compile(r'^scripts/.*'),
re.compile(r'^CLAUDE\.md$'),
re.compile(r'^README\.md$'),
]
def is_ok(rel):
return any(p.match(rel) for p in ORPHAN_OK)
orphans = []
for f in files:
if f not in incoming:
rel = str(f.relative_to(VAULT))
if not is_ok(rel):
orphans.append(rel)
print('=== MISSING FRONTMATTER ({}) ==='.format(len(missing)))
for x in missing:
print(x)
print()
print('=== ORPHANS ({}) ==='.format(len(orphans)))
for x in orphans:
print(x)
print()
print('=== UNDATED TODOS ({}) ==='.format(sum(len(v) for v in todos.values())))
for f, lines in sorted(todos.items(), key=lambda x: -len(x[1])):
print('FILE:', f, '({})'.format(len(lines)))
for ln, line in lines:
print(' L{}: {}'.format(ln, line.strip()))