diff --git a/audit/2026-04-18-health.md b/audit/2026-04-18-health.md new file mode 100644 index 0000000..596be7d --- /dev/null +++ b/audit/2026-04-18-health.md @@ -0,0 +1,141 @@ +--- +date: 2026-04-18 +type: audit +source: kb-health.py +score: 493 +tags: [audit, health, metric] +--- + +# KB health — 2026-04-18 + +**Score (меньше = лучше): `493`** +Проверено файлов: 158 + +## Разбивка + +| Категория | Кол-во | Вес | Штраф | +|---|---:|---:|---:| +| broken_wikilinks | 0 | 10 | 0 | +| broken_paths | 4 | 10 | 40 | +| missing_frontmatter | 70 | 3 | 210 | +| orphan_files | 90 | 2 | 180 | +| undated_todos | 48 | 1 | 48 | +| duplicate_basenames | 3 | 5 | 15 | +| **ИТОГО** | | | **493** | + +## Битые relative-пути + +| Откуда | Путь | +|---|---| +| `decisions/2026-04-14-niikn-openwrt-awg-fix.md` | `/root/.claude/knowledge-base/projects/niikn/openwrt-bypass.md` | +| `decisions/2026-04-14-niikn-openwrt-awg-fix.md` | `/root/.claude/projects/-root/memory/niikn-vpn-status.md` | +| `projects/dttb/spaceweb-dns.md` | `feedback_spaceweb_dns.md` | +| `claude-memory/niikn-vpn-status.md` | `/root/.claude/knowledge-base/projects/niikn/openwrt-bypass.md` | + +## Без frontmatter (70) + +- `decisions/2026-04-17-peredelki-podkop-stability-fix.md` +- `decisions/2026-03-03-mailserver-setup-scenario.md` +- `decisions/2026-04-16-unifi-migration-peredelki.md` +- `decisions/2026-04-14-niikn-openwrt-awg-fix.md` +- `decisions/2026-03-04-matrix-niikn-setup.md` +- `decisions/2026-02-26-clawdbot-129-cliproxy-fix.md` +- `decisions/2026-04-14-openclaw-claude-code-pipeline.md` +- `decisions/2026-04-17-code-server-upgrade.md` +- `notes/2026-02-26-session-summary.md` +- `notes/2026-02-26-knowledge-base-setup.md` +- `notes/2026-02-26-full-session-log.md` +- `notes/2026-02-26-claude-code-session-clawdbot-fix.md` +- `projects/clawdbot-bots.md` +- `projects/bitrix-sites.md` +- `projects/infrastructure-overview.md` +- `projects/homelab-proxmox.md` +- `projects/video-surveillance.md` +- `projects/all-projects-summary.md` +- `projects/unresolved-issues.md` +- `projects/nextcloud.md` +- `projects/dttb/agentdvr-home.md` +- `projects/dttb/mailcow-dttb.md` +- `projects/dttb/netbird-inventory.md` +- `projects/dttb/npm-proxy-hosts.md` +- `projects/dttb/gitea.md` +- `projects/dttb/video-surveillance-report.md` +- `projects/dttb/network-topology.md` +- `projects/dttb/server1c.md` +- `projects/dttb/npm-homelab.md` +- `projects/dttb/openclaw.md` +- ... +40 ещё + +## Orphan — без бэклинков (90) + +_Эти файлы никто не упоминает через `[[..]]`. Кандидаты на удаление или добавление ссылок._ + +- `decisions/2026-04-17-peredelki-podkop-stability-fix.md` +- `decisions/2026-03-03-mailserver-setup-scenario.md` +- `decisions/2026-04-16-unifi-migration-peredelki.md` +- `decisions/2026-04-14-niikn-openwrt-awg-fix.md` +- `decisions/2026-03-04-matrix-niikn-setup.md` +- `decisions/2026-02-26-clawdbot-129-cliproxy-fix.md` +- `decisions/README.md` +- `decisions/2026-04-14-openclaw-claude-code-pipeline.md` +- `decisions/2026-04-17-code-server-upgrade.md` +- `notes/2026-02-26-session-summary.md` +- `notes/2026-02-26-knowledge-base-setup.md` +- `notes/README.md` +- `notes/2026-02-26-full-session-log.md` +- `notes/2026-02-26-claude-code-session-clawdbot-fix.md` +- `projects/clawdbot-bots.md` +- `projects/bitrix-sites.md` +- `projects/infrastructure-overview.md` +- `projects/homelab-proxmox.md` +- `projects/video-surveillance.md` +- `projects/all-projects-summary.md` +- `projects/unresolved-issues.md` +- `projects/nextcloud.md` +- `projects/krasnogorsk/README.md` +- `projects/dttb/agentdvr-home.md` +- `projects/dttb/mailcow-dttb.md` +- `projects/dttb/netbird-inventory.md` +- `projects/dttb/gitea.md` +- `projects/dttb/video-surveillance-report.md` +- `projects/dttb/network-topology.md` +- `projects/dttb/server1c.md` +- ... +60 ещё + +## TODO без даты (48 шт в 12 файлах) + +- `projects/niikn/changelog.md` — 12 шт +- `projects/niikn/NIIKN-ChangeLog.md` — 6 шт +- `projects/niikn/matrix.md` — 5 шт +- `projects/dttb/nextcloud-talk-bot/README.md` — 5 шт +- `daily/2026-04-19.md` — 5 шт +- `claude-memory/mas-niikn.md` — 5 шт +- `decisions/2026-04-16-unifi-migration-peredelki.md` — 4 шт +- `projects/niikn/README.md` — 2 шт +- `decisions/2026-04-14-openclaw-claude-code-pipeline.md` — 1 шт +- `projects/dttb/mailcow-dttb.md` — 1 шт +- `daily/2026-04-17.md` — 1 шт +- `templates/daily-note.md` — 1 шт + +## Дубликаты имён (3) + +- `README.md`: + - `README.md` + - `decisions/README.md` + - `notes/README.md` + - `projects/krasnogorsk/README.md` + - `projects/dttb/README.md` + - `projects/glavtorg/README.md` + - `projects/niikn/README.md` + - `projects/dttb/nextcloud-talk-bot/README.md` + - `snippets/README.md` + - `scripts/README.md` +- `nextcloud.md`: + - `projects/nextcloud.md` + - `projects/dttb/nextcloud.md` +- `proxmox-inventory.md`: + - `projects/dttb/proxmox-inventory.md` + - `projects/mmfb/proxmox-inventory.md` + +--- +*Генерируется `scripts/kb-health.py`. JSON-версия в `audit/health-latest.json` для agent-loop.* \ No newline at end of file diff --git a/audit/health-latest.json b/audit/health-latest.json new file mode 100644 index 0000000..d85b590 --- /dev/null +++ b/audit/health-latest.json @@ -0,0 +1,21 @@ +{ + "date": "2026-04-18", + "score": 493, + "counts": { + "broken_wikilinks": 0, + "broken_paths": 4, + "missing_frontmatter": 70, + "orphan_files": 90, + "undated_todos": 48, + "duplicate_basenames": 3 + }, + "weights": { + "broken_wikilinks": 10, + "broken_paths": 10, + "missing_frontmatter": 3, + "orphan_files": 2, + "undated_todos": 1, + "duplicate_basenames": 5 + }, + "files_scanned": 158 +} \ No newline at end of file diff --git a/scripts/kb-health.py b/scripts/kb-health.py new file mode 100755 index 0000000..c72b1c0 --- /dev/null +++ b/scripts/kb-health.py @@ -0,0 +1,350 @@ +#!/usr/bin/env python3 +""" +kb-health — объективная метрика качества KB (karpathy-style val_bpb). + +Считаем штрафы. Меньше = лучше. + +Категории: + - broken_wikilinks [[foo]] не ведёт никуда + - broken_paths [text](./foo.md) — путь не существует + - missing_frontmatter .md без `---\\n...\\n---` + - orphan_files нет входящих wikilinks, не в системных папках + - undated_todos "- [ ] ..." без YYYY-MM-DD в строке + - duplicate_basenames два файла с одинаковым basename в разных папках + +Веса (итоговый score = сумма): + broken_wikilinks * 10 + broken_paths * 10 + missing_frontmatter * 3 + orphan_files * 2 + undated_todos * 1 + duplicate_basenames * 5 + +Вывод: + audit/YYYY-MM-DD-health.md — человекочитаемый отчёт + audit/health-latest.json — JSON для kb-agent-loop.sh (сравнение score между прогонами) +""" + +import json +import re +import sys +from collections import defaultdict +from datetime import date +from pathlib import Path + +VAULT = Path(__file__).resolve().parent.parent +OUT_DIR = VAULT / "audit" + +# папки/файлы которые сканируем +INCLUDE_DIRS = ["decisions", "notes", "projects", "snippets", "daily", + "claude-memory", "templates", "scripts", "audit"] +INCLUDE_ROOT_FILES = ["CLAUDE.md", "README.md"] + +# папки исключаем полностью +EXCLUDE_DIRS = {".git", ".obsidian", ".claude"} + +# файлы где orphan-статус норма (системные, служебные) +ORPHAN_OK_PATTERNS = [ + re.compile(r"^daily/.*"), # daily-notes редко бэклинкуются + re.compile(r"^audit/.*"), # audit-отчёты + re.compile(r"^templates/.*"), # шаблоны + re.compile(r"^notes/claude/.*"), # автосейвы сессий Claude + re.compile(r"^scripts/.*"), # скрипты + re.compile(r"^CLAUDE\.md$"), + re.compile(r"^README\.md$"), +] + +WEIGHTS = { + "broken_wikilinks": 10, + "broken_paths": 10, + "missing_frontmatter": 3, + "orphan_files": 2, + "undated_todos": 1, + "duplicate_basenames": 5, +} + + +def collect_md_files(): + """Возвращает list[Path] — все .md файлы в scope.""" + files = [] + for name in INCLUDE_ROOT_FILES: + p = VAULT / name + if p.is_file(): + files.append(p) + for dname in INCLUDE_DIRS: + root = VAULT / dname + if not root.exists(): + continue + for p in root.rglob("*.md"): + if any(part in EXCLUDE_DIRS for part in p.parts): + continue + files.append(p) + return files + + +def rel(p: Path) -> str: + return str(p.relative_to(VAULT)) + + +def has_frontmatter(text: str) -> bool: + """Frontmatter = `---\\n...\\n---` в самом начале.""" + if not text.startswith("---\n"): + return False + return bool(re.match(r"---\n(.*?\n)*?---\n", text)) + + +def strip_code(text: str) -> str: + """Убираем inline `...` и fenced ```...``` — чтобы regex не цеплял примеры кода.""" + text = re.sub(r"```[\s\S]*?```", "", text) + text = re.sub(r"`[^`\n]*`", "", text) + return text + + +def extract_wikilinks(text: str): + """Возвращает list[str] — таргеты без alias/heading. Игнорим код-блоки.""" + out = [] + for m in re.finditer(r"\[\[([^\]]+)\]\]", strip_code(text)): + target = m.group(1).split("|")[0].split("#")[0].strip() + if target: + out.append(target) + return out + + +def extract_md_paths(text: str): + """Относительные пути типа [text](./foo.md) или (../foo/bar.md).""" + out = [] + for m in re.finditer(r"\]\(([^)]+?\.md)(?:#[^)]*)?\)", strip_code(text)): + path = m.group(1) + if path.startswith("http"): + continue + out.append(path) + return out + + +def count_undated_todos(text: str) -> int: + """'- [ ] ...' без упоминания даты YYYY-MM-DD в той же строке.""" + count = 0 + for line in text.splitlines(): + if re.match(r"^\s*-\s*\[\s*\]\s+", line): + if not re.search(r"\d{4}-\d{2}-\d{2}", line): + count += 1 + return count + + +def resolve_wikilink(target: str, all_basenames: dict, from_file: Path): + """Ищем файл по wikilink-target. Возвращает Path или None. + Стратегии: + 1. '../foo/bar' — relative от файла-источника + 2. 'folder/bar' — от корня vault + 3. 'bar' — по basename в любой папке (Obsidian flat namespace) + """ + target_clean = target.replace(".md", "") + # relative с ../ или ./ + if target_clean.startswith((".", "/")): + try: + resolved = (from_file.parent / target_clean).resolve() + # добавляем .md если нет + candidates = [resolved.with_suffix(".md"), resolved] + for c in candidates: + if c.is_file() and str(c).endswith(".md"): + return c + except Exception: + pass + # полный путь от корня vault + guess = VAULT / f"{target_clean}.md" + if guess.is_file(): + return guess + # только basename — flat namespace + basename = target_clean.rsplit("/", 1)[-1] + if basename in all_basenames: + return all_basenames[basename][0] + return None + + +def resolve_md_path(path: str, from_file: Path): + """Относительный путь из файла from_file.""" + try: + resolved = (from_file.parent / path).resolve() + if resolved.is_file(): + return resolved + except Exception: + pass + return None + + +def is_orphan_ok(relpath: str) -> bool: + return any(pat.match(relpath) for pat in ORPHAN_OK_PATTERNS) + + +def main(): + today = date.today().isoformat() + OUT_DIR.mkdir(parents=True, exist_ok=True) + + files = collect_md_files() + if not files: + print("no md files found", file=sys.stderr) + sys.exit(1) + + # basename-index (для wikilink-резолва) + basenames = defaultdict(list) + for p in files: + key = p.stem + basenames[key].append(p) + + # метрики + broken_wl = [] # (file, target) + broken_paths = [] # (file, path) + missing_fm = [] # file + undated_todos_per_file = {} # file: count + duplicate_basenames = [] # (basename, files) + incoming_links = defaultdict(set) # file → set of files linking TO it + + # CLAUDE.md, README.md, шаблоны — служебные, frontmatter не требуем + fm_exempt = {"CLAUDE.md", "README.md", ".cursorrules"} + + for f in files: + text = f.read_text(errors="ignore") + rel_f = rel(f) + + if rel_f not in fm_exempt and f.name != "README.md" and not has_frontmatter(text): + missing_fm.append(rel_f) + + for target in extract_wikilinks(text): + resolved = resolve_wikilink(target, basenames, f) + if resolved is None: + broken_wl.append((rel_f, target)) + else: + incoming_links[resolved].add(f) + + for path in extract_md_paths(text): + resolved = resolve_md_path(path, f) + if resolved is None: + broken_paths.append((rel_f, path)) + + n_todos = count_undated_todos(text) + if n_todos > 0: + undated_todos_per_file[rel_f] = n_todos + + # orphan = нет incoming links, не в OK-zones + orphans = [] + for f in files: + if f not in incoming_links: + rel_f = rel(f) + if not is_orphan_ok(rel_f): + orphans.append(rel_f) + + # duplicate basenames + for name, paths in basenames.items(): + if len(paths) > 1: + duplicate_basenames.append((name, [rel(p) for p in paths])) + + counts = { + "broken_wikilinks": len(broken_wl), + "broken_paths": len(broken_paths), + "missing_frontmatter": len(missing_fm), + "orphan_files": len(orphans), + "undated_todos": sum(undated_todos_per_file.values()), + "duplicate_basenames": len(duplicate_basenames), + } + score = sum(counts[k] * WEIGHTS[k] for k in counts) + + # JSON + latest = { + "date": today, + "score": score, + "counts": counts, + "weights": WEIGHTS, + "files_scanned": len(files), + } + (OUT_DIR / "health-latest.json").write_text(json.dumps(latest, indent=2, ensure_ascii=False)) + + # Markdown report + lines = [ + "---", + f"date: {today}", + "type: audit", + "source: kb-health.py", + f"score: {score}", + "tags: [audit, health, metric]", + "---", + "", + f"# KB health — {today}", + "", + f"**Score (меньше = лучше): `{score}`**", + f"Проверено файлов: {len(files)}", + "", + "## Разбивка", + "", + "| Категория | Кол-во | Вес | Штраф |", + "|---|---:|---:|---:|", + ] + for k in WEIGHTS: + c = counts[k] + w = WEIGHTS[k] + lines.append(f"| {k} | {c} | {w} | {c * w} |") + lines += ["| **ИТОГО** | | | **" + str(score) + "** |", ""] + + if broken_wl: + lines += ["## Битые wikilinks", ""] + lines += ["| Откуда | `[[таргет]]` |", "|---|---|"] + for fr, tg in broken_wl[:50]: + lines.append(f"| `{fr}` | `[[{tg}]]` |") + if len(broken_wl) > 50: + lines.append(f"| ... | +{len(broken_wl)-50} ещё |") + lines.append("") + + if broken_paths: + lines += ["## Битые relative-пути", ""] + lines += ["| Откуда | Путь |", "|---|---|"] + for fr, pt in broken_paths[:50]: + lines.append(f"| `{fr}` | `{pt}` |") + if len(broken_paths) > 50: + lines.append(f"| ... | +{len(broken_paths)-50} ещё |") + lines.append("") + + if missing_fm: + lines += [f"## Без frontmatter ({len(missing_fm)})", ""] + for f in missing_fm[:30]: + lines.append(f"- `{f}`") + if len(missing_fm) > 30: + lines.append(f"- ... +{len(missing_fm)-30} ещё") + lines.append("") + + if orphans: + lines += [f"## Orphan — без бэклинков ({len(orphans)})", "", + "_Эти файлы никто не упоминает через `[[..]]`. Кандидаты на удаление или добавление ссылок._", ""] + for f in orphans[:30]: + lines.append(f"- `{f}`") + if len(orphans) > 30: + lines.append(f"- ... +{len(orphans)-30} ещё") + lines.append("") + + if undated_todos_per_file: + lines += [f"## TODO без даты ({sum(undated_todos_per_file.values())} шт в {len(undated_todos_per_file)} файлах)", ""] + for f, n in sorted(undated_todos_per_file.items(), key=lambda x: -x[1])[:20]: + lines.append(f"- `{f}` — {n} шт") + lines.append("") + + if duplicate_basenames: + lines += [f"## Дубликаты имён ({len(duplicate_basenames)})", ""] + for name, paths in duplicate_basenames[:20]: + lines.append(f"- `{name}.md`:") + for p in paths: + lines.append(f" - `{p}`") + lines.append("") + + lines += [ + "---", + "*Генерируется `scripts/kb-health.py`. JSON-версия в `audit/health-latest.json` для agent-loop.*", + ] + + out = OUT_DIR / f"{today}-health.md" + out.write_text("\n".join(lines)) + print(f"health report: {out}") + print(f" score: {score}") + for k, v in counts.items(): + print(f" {k}: {v}") + + +if __name__ == "__main__": + main()