116 lines
3.2 KiB
Python
116 lines
3.2 KiB
Python
#!/usr/bin/env python3
|
|
"""Temporary diagnostic — list all KB issues in full."""
|
|
import re
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
|
|
VAULT = Path(__file__).resolve().parent.parent
|
|
INCLUDE_DIRS = ['decisions', 'notes', 'projects', 'snippets', 'daily',
|
|
'claude-memory', 'templates', 'scripts', 'audit']
|
|
EXCLUDE = {'.git', '.obsidian', '.claude'}
|
|
|
|
files = []
|
|
for d in INCLUDE_DIRS:
|
|
root = VAULT / d
|
|
if not root.exists():
|
|
continue
|
|
for p in root.rglob('*.md'):
|
|
if any(part in EXCLUDE for part in p.parts):
|
|
continue
|
|
files.append(p)
|
|
for name in ('CLAUDE.md', 'README.md'):
|
|
p = VAULT / name
|
|
if p.is_file():
|
|
files.append(p)
|
|
|
|
def has_fm(txt):
|
|
if not txt.startswith('---\n'):
|
|
return False
|
|
return bool(re.match(r'---\n(.*?\n)*?---\n', txt))
|
|
|
|
def strip_code(t):
|
|
t = re.sub(r'```[\s\S]*?```', '', t)
|
|
t = re.sub(r'`[^`\n]*`', '', t)
|
|
return t
|
|
|
|
def wikilinks(t):
|
|
out = []
|
|
for m in re.finditer(r'\[\[([^\]]+)\]\]', strip_code(t)):
|
|
tgt = m.group(1).split('|')[0].split('#')[0].strip()
|
|
if tgt:
|
|
out.append(tgt)
|
|
return out
|
|
|
|
basenames = defaultdict(list)
|
|
for p in files:
|
|
basenames[p.stem].append(p)
|
|
|
|
def resolve_wl(target, from_file):
|
|
tc = target.replace('.md', '')
|
|
if tc.startswith(('.', '/')):
|
|
try:
|
|
r = (from_file.parent / tc).resolve()
|
|
for c in (r.with_suffix('.md'), r):
|
|
if c.is_file() and str(c).endswith('.md'):
|
|
return c
|
|
except Exception:
|
|
pass
|
|
g = VAULT / f'{tc}.md'
|
|
if g.is_file():
|
|
return g
|
|
b = tc.rsplit('/', 1)[-1]
|
|
if b in basenames:
|
|
return basenames[b][0]
|
|
return None
|
|
|
|
missing = []
|
|
incoming = defaultdict(set)
|
|
todos = defaultdict(list)
|
|
|
|
for f in files:
|
|
txt = f.read_text(errors='ignore')
|
|
rel = str(f.relative_to(VAULT))
|
|
if f.name not in ('README.md', 'CLAUDE.md') and not has_fm(txt):
|
|
missing.append(rel)
|
|
for target in wikilinks(txt):
|
|
r = resolve_wl(target, f)
|
|
if r:
|
|
incoming[r].add(f)
|
|
for i, line in enumerate(txt.splitlines(), 1):
|
|
if re.match(r'^\s*-\s*\[\s*\]\s+', line):
|
|
if not re.search(r'\d{4}-\d{2}-\d{2}', line):
|
|
todos[rel].append((i, line))
|
|
|
|
ORPHAN_OK = [
|
|
re.compile(r'^daily/.*'),
|
|
re.compile(r'^audit/.*'),
|
|
re.compile(r'^templates/.*'),
|
|
re.compile(r'^notes/claude/.*'),
|
|
re.compile(r'^scripts/.*'),
|
|
re.compile(r'^CLAUDE\.md$'),
|
|
re.compile(r'^README\.md$'),
|
|
]
|
|
def is_ok(rel):
|
|
return any(p.match(rel) for p in ORPHAN_OK)
|
|
|
|
orphans = []
|
|
for f in files:
|
|
if f not in incoming:
|
|
rel = str(f.relative_to(VAULT))
|
|
if not is_ok(rel):
|
|
orphans.append(rel)
|
|
|
|
print('=== MISSING FRONTMATTER ({}) ==='.format(len(missing)))
|
|
for x in missing:
|
|
print(x)
|
|
print()
|
|
print('=== ORPHANS ({}) ==='.format(len(orphans)))
|
|
for x in orphans:
|
|
print(x)
|
|
print()
|
|
print('=== UNDATED TODOS ({}) ==='.format(sum(len(v) for v in todos.values())))
|
|
for f, lines in sorted(todos.items(), key=lambda x: -len(x[1])):
|
|
print('FILE:', f, '({})'.format(len(lines)))
|
|
for ln, line in lines:
|
|
print(' L{}: {}'.format(ln, line.strip()))
|