#!/usr/bin/env python3 """ kb-audit-creds — берёт все URL из credentials.md, пингует каждый, фиксирует unreachable или нестандартные коды ответа. Пишет audit/YYYY-MM-DD-creds-drift.md. Проверка только reachability (HTTP status). Не тестирует реальный логин. """ import re import ssl import sys import urllib.error import urllib.request from datetime import date from pathlib import Path VAULT = Path(__file__).resolve().parent.parent CREDS = VAULT / "projects/dttb/credentials.md" OUT_DIR = VAULT / "audit" _CTX = ssl.create_default_context() _CTX.check_hostname = False _CTX.verify_mode = ssl.CERT_NONE def extract_urls(text: str): """URL в обычной записи + в markdown-таблице. Пропускаем URL с embedded credentials (http://user:pass@host) — они для git/curl, не для reachability-check. """ pattern = re.compile(r"https?://[a-zA-Z0-9._:@-]+(?:/[^\s`|<>\"')]*)?") seen = {} for m in pattern.finditer(text): url = m.group(0).rstrip("/.,;:)") # пропускаем URLs с embedded creds if re.match(r"https?://[^/]*@", url): continue if url not in seen: start = max(0, m.start() - 30) end = min(len(text), m.end() + 30) ctx = text[start:end].replace("\n", " ").replace("|", " ").strip() seen[url] = ctx[:80] return seen def _request(url: str, method: str, timeout: int): req = urllib.request.Request(url, method=method, headers={"User-Agent": "kb-audit/1.0"}) r = urllib.request.urlopen(req, context=_CTX, timeout=timeout) return str(r.status), r.reason or "" def ping(url: str, timeout: int = 6) -> tuple[str, str]: """Возвращает (status, detail). Пробуем HEAD, при 501/405 fallback на GET.""" try: return _request(url, "HEAD", timeout) except urllib.error.HTTPError as e: if e.code in (501, 405): try: return _request(url, "GET", timeout) except urllib.error.HTTPError as e2: return str(e2.code), e2.reason or "" except Exception as e2: return "ERR", str(e2)[:60] return str(e.code), e.reason or "" except urllib.error.URLError as e: reason = str(e.reason) if "ssl" in reason.lower() or "certificate" in reason.lower(): return "SSL", reason[:60] return "FAIL", reason[:60] except TimeoutError: return "TIMEOUT", "" except Exception as e: return "ERR", str(e)[:60] def classify(status: str) -> str: """Status → категория для отчёта.""" if status in ("200", "301", "302", "303", "307", "308"): return "✓ reachable" if status in ("401", "403"): return "✓ auth-required (сервер жив)" if status in ("404", "405"): # 405 на HEAD, 404 ок при ping host root return "⚠ 4xx (сервер жив, но путь/метод)" if status.startswith("5"): return "❌ 5xx server error" if status in ("FAIL", "ERR", "TIMEOUT", "SSL"): return "❌ недоступен" return f"? {status}" def main(): today = date.today().isoformat() OUT_DIR.mkdir(parents=True, exist_ok=True) out = OUT_DIR / f"{today}-creds-drift.md" text = CREDS.read_text() urls = extract_urls(text) if not urls: print("no URLs found in credentials.md", file=sys.stderr) sys.exit(1) results = [] for url, ctx in urls.items(): status, detail = ping(url) category = classify(status) results.append((url, ctx, status, detail, category)) unreachable = [r for r in results if "❌" in r[4]] questionable = [r for r in results if "⚠" in r[4] or "?" in r[4]] ok = [r for r in results if "✓" in r[4]] lines = [ "---", f"date: {today}", "type: audit", "source: kb-audit-creds.py", "tags: [audit, creds, reachability]", "---", "", f"# Credentials reachability — {today}", "", f"Ping-проверка URL из [[../projects/dttb/credentials|credentials.md]].", f"Проверяется только reachability (HTTP status), не реальный логин.", "", f"- Всего URL: **{len(results)}**", f"- ✓ Reachable: {len(ok)} / ⚠ Questionable: {len(questionable)} / ❌ Unreachable: {len(unreachable)}", "", ] if unreachable: lines += ["## ❌ Недоступные (проверить: сервер упал? URL поменялся?)", "", "| URL | Status | Detail | Контекст |", "|---|---|---|---|"] for url, ctx, st, det, _ in unreachable: lines.append(f"| `{url}` | {st} | {det[:40]} | {ctx[:50]} |") lines.append("") if questionable: lines += ["## ⚠ Нестандартный ответ", "", "| URL | Status | Detail |", "|---|---|---|"] for url, ctx, st, det, _ in questionable: lines.append(f"| `{url}` | {st} | {det[:40]} |") lines.append("") lines += ["## ✓ Все ответили нормально", "", "| URL | Status | Категория |", "|---|---|---|"] for url, ctx, st, det, cat in ok: lines.append(f"| `{url}` | {st} | {cat} |") lines += ["", "---", "*Автоматически через `scripts/kb-audit-creds.py`.*"] out.write_text("\n".join(lines)) print(f"creds drift: {out}") print(f" ok: {len(ok)} / questionable: {len(questionable)} / unreachable: {len(unreachable)}") if __name__ == "__main__": main()