#!/usr/bin/env python3 """Инкрементальный синк vault → Open WebUI Knowledge. Синкает только полезные папки, исключает файлы с секретами. Идемпотентен (по md5). Устойчив: пустые файлы и ошибки отдельных файлов не валят весь прогон. Запуск по cron на LXC 142. Логи: /opt/owui-kb-sync/sync.log """ import os, sys, json, hashlib, re, urllib.request, urllib.error, uuid BASE = "http://localhost:3000" EMAIL, PASSWORD = "it5870@yandex.ru", "1qaz!QAZ" KB_ID = "7f60313d-add9-4f99-ad53-89e792295129" ROOT = "/opt/owui-kb-sync/kb" MANIFEST = "/opt/owui-kb-sync/manifest.json" INCLUDE_DIRS = ("projects", "decisions", "claude-memory", "snippets") EXCLUDE_SUBSTR = ("credential", "secret", ".env", "/password") # путь в нижнем регистре _FM = re.compile(r"^---\n.*?\n---\n", re.S) def _req(path, data=None, token=None, method=None, raw=None, ctype="application/json", timeout=180): hdr = {} if token: hdr["Authorization"] = "Bearer " + token if raw is not None: body, hdr["Content-Type"] = raw, ctype elif data is not None: body, hdr["Content-Type"] = json.dumps(data).encode(), ctype else: body = None r = urllib.request.Request(BASE + path, data=body, headers=hdr, method=method or ("POST" if body else "GET")) with urllib.request.urlopen(r, timeout=timeout) as resp: return json.loads(resp.read().decode()) def login(): return _req("/api/v1/auths/signin", {"email": EMAIL, "password": PASSWORD})["token"] def has_text(data): """Есть ли осмысленный текст помимо YAML-фронтматтера.""" try: t = data.decode("utf-8", "ignore") except Exception: return False t = _FM.sub("", t).strip() return len(t) >= 5 def upload_file(token, relpath, content_bytes): boundary = "----owui" + uuid.uuid4().hex body = b"".join([ f"--{boundary}\r\n".encode(), f'Content-Disposition: form-data; name="file"; filename="{relpath}"\r\n'.encode(), b"Content-Type: text/markdown\r\n\r\n", content_bytes, f"\r\n--{boundary}--\r\n".encode(), ]) return _req("/api/v1/files/", raw=body, token=token, ctype=f"multipart/form-data; boundary={boundary}") def kb_add(token, file_id): return _req(f"/api/v1/knowledge/{KB_ID}/file/add", {"file_id": file_id}, token=token) def safe(fn, *a): try: fn(*a) except Exception as e: print(" warn:", e) def kb_remove(token, file_id): if not file_id: return safe(lambda: _req(f"/api/v1/knowledge/{KB_ID}/file/remove", {"file_id": file_id}, token=token)) def file_delete(token, file_id): if not file_id: return safe(lambda: _req(f"/api/v1/files/{file_id}", token=token, method="DELETE")) def wanted(relpath): low = "/" + relpath.lower() return (relpath.startswith(INCLUDE_DIRS) and relpath.endswith(".md") and not any(s in low for s in EXCLUDE_SUBSTR)) def main(): token = login() manifest = json.load(open(MANIFEST)) if os.path.exists(MANIFEST) else {} current = {} skipped_empty = 0 for dirpath, _, files in os.walk(ROOT): for f in files: rel = os.path.relpath(os.path.join(dirpath, f), ROOT) if not wanted(rel): continue data = open(os.path.join(dirpath, f), "rb").read() if not has_text(data): skipped_empty += 1 continue current[rel] = (hashlib.md5(data).hexdigest(), data) added = changed = removed = errors = 0 for rel, (h, data) in current.items(): old = manifest.get(rel) if old and old["hash"] == h: continue try: if old: kb_remove(token, old["file_id"]) file_delete(token, old["file_id"]) fid = upload_file(token, rel, data)["id"] kb_add(token, fid) manifest[rel] = {"hash": h, "file_id": fid} changed += 1 if old else 0 added += 0 if old else 1 except urllib.error.HTTPError as e: errors += 1 if e.code == 400: # дубликат/пустой контент — близнец уже в коллекции, не ретраить manifest[rel] = {"hash": h, "file_id": None, "skip": True} print(f" ERR {rel}: HTTP {e.code}") except Exception as e: errors += 1 print(f" ERR {rel}: {e}") for rel in list(manifest): if rel not in current: kb_remove(token, manifest[rel]["file_id"]) file_delete(token, manifest[rel]["file_id"]) del manifest[rel] removed += 1 json.dump(manifest, open(MANIFEST, "w"), ensure_ascii=False) print(f"sync done: +{added} ~{changed} -{removed} | пропущено пустых: {skipped_empty} | " f"ошибок: {errors} | в коллекции: {len(manifest)}") if __name__ == "__main__": main()