maubot-plugins/wiki/wiki.py

import asyncio
import difflib
import re
import time
from typing import Optional

from maubot import MessageEvent, Plugin
from maubot.handlers import command
from mautrix.util.config import BaseProxyConfig, ConfigUpdateHelper


class Config(BaseProxyConfig):
    def do_update(self, helper: ConfigUpdateHelper) -> None:
        helper.copy("docs_url")
        helper.copy("index_path")
        helper.copy("refresh_minutes")
        helper.copy("max_results")
        helper.copy("snippet_chars")
        helper.copy("site_name")


_CMD_RE = re.compile(r"^!(ask|doc|wiki)(?:[ \t\r\n]+(.*))?$", re.DOTALL)
_TOKEN_RE = re.compile(r"[a-z0-9]+", re.IGNORECASE)
_STOPWORDS = frozenset({
    "a", "an", "and", "as", "at", "be", "by", "for", "from", "how", "i",
    "in", "is", "it", "of", "on", "or", "than", "that", "the", "this",
    "to", "was", "what", "when", "where", "which", "who", "why", "with",
    "do", "does", "did", "are", "we", "you", "our", "my", "me",
})


def _tokens(s: str) -> list[str]:
    return [t.lower() for t in _TOKEN_RE.findall(s) if t.lower() not in _STOPWORDS]


def _make_url(base: str, slug: str) -> str:
    return f"{base.rstrip('/')}/{slug.lstrip('/')}"


def _snippet(content: str, terms: list[str], width: int) -> str:
    if not content:
        return ""
    lc = content.lower()
    first = -1
    matched_term = None
    for t in terms:
        idx = lc.find(t)
        if idx != -1 and (first == -1 or idx < first):
            first = idx
            matched_term = t
    if first == -1:
        return content[:width].strip() + ("…" if len(content) > width else "")
    start = max(0, first - width // 2)
    end = min(len(content), start + width)
    chunk = content[start:end].strip().replace("\n", " ")
    if matched_term:
        chunk = re.sub(
            rf"(?i)\b({re.escape(matched_term)})\b",
            r"**\1**",
            chunk,
        )
    prefix = "…" if start > 0 else ""
    suffix = "…" if end < len(content) else ""
    return f"{prefix}{chunk}{suffix}"


class WikiBot(Plugin):
    config: Config
    _index: dict
    _slug_titles: list[tuple[str, str]]
    _last_refresh: float
    _refresh_task: Optional[asyncio.Task]

    @classmethod
    def get_config_class(cls):
        return Config

    async def start(self) -> None:
        await super().start()
        self.config.load_and_update()
        self._index = {}
        self._slug_titles = []
        self._last_refresh = 0.0
        self._refresh_task = asyncio.create_task(self._refresh_loop())

    async def stop(self) -> None:
        if self._refresh_task:
            self._refresh_task.cancel()
        await super().stop()

    async def on_external_config_update(self) -> None:
        self.config.load_and_update()

    async def _refresh_loop(self) -> None:
        try:
            while True:
                try:
                    await self._refresh()
                except Exception:
                    self.log.exception("wiki refresh failed; will retry")
                await asyncio.sleep(self.config["refresh_minutes"] * 60)
        except asyncio.CancelledError:
            raise

    async def _refresh(self) -> None:
        url = _make_url(self.config["docs_url"], self.config["index_path"])
        async with self.http.get(url) as resp:
            resp.raise_for_status()
            data = await resp.json(content_type=None)
        new_index = {}
        slug_titles = []
        for slug, entry in data.items():
            title = (entry.get("title") or slug).strip()
            content = entry.get("content") or ""
            new_index[slug] = {
                "title": title,
                "content": content,
                "tags": entry.get("tags") or [],
            }
            slug_titles.append((slug, title.lower()))
        self._index = new_index
        self._slug_titles = slug_titles
        self._last_refresh = time.time()
        self.log.info("wiki refresh: %d docs from %s", len(new_index), url)

    def _search(self, query: str, limit: int) -> list[tuple[float, str, dict]]:
        terms = _tokens(query)
        if not terms:
            return []
        hits = []
        for slug, doc in self._index.items():
            title_lc = doc["title"].lower()
            content_lc = doc["content"].lower()
            score = 0.0
            for t in terms:
                if t in title_lc:
                    score += 5.0
                if t in content_lc:
                    score += 1.0 + 0.1 * content_lc.count(t)
            if score > 0:
                hits.append((score, slug, doc))
        hits.sort(key=lambda x: x[0], reverse=True)
        return hits[:limit]

    def _lookup(self, query: str) -> tuple[Optional[str], Optional[dict]]:
        q = query.strip().lower()
        if not q:
            return None, None
        if q in self._index:
            return q, self._index[q]
        for slug in self._index:
            if slug.lower().endswith("/" + q) or slug.lower() == q:
                return slug, self._index[slug]
        candidates = [t for _, t in self._slug_titles]
        match = difflib.get_close_matches(q, candidates, n=1, cutoff=0.6)
        if match:
            for slug, title in self._slug_titles:
                if title == match[0]:
                    return slug, self._index[slug]
        return None, None

    @command.passive(regex=_CMD_RE)
    async def dispatch(self, evt: MessageEvent, match) -> None:
        verb = match[1].lower()
        body = (match[2] or "").strip()
        if verb == "ask":
            await self._handle_ask(evt, body)
        elif verb == "doc":
            await self._handle_doc(evt, body)
        elif verb == "wiki":
            await self._handle_wiki(evt, body)

    async def _handle_ask(self, evt: MessageEvent, body: str) -> None:
        if not body:
            await evt.reply(
                f"Usage: `!ask <question>` — search {self.config['site_name']}."
            )
            return
        if not self._index:
            await evt.reply("Wiki index isn't ready yet; try again in a moment.")
            return
        hits = self._search(body, self.config["max_results"])
        if not hits:
            await evt.reply(f"No matches in {self.config['site_name']} for that.")
            return
        terms = _tokens(body)
        snippet_chars = self.config["snippet_chars"]
        lines = [f"**{self.config['site_name']} — {len(hits)} match(es):**"]
        for score, slug, doc in hits:
            url = _make_url(self.config["docs_url"], slug)
            snip = _snippet(doc["content"], terms, snippet_chars)
            lines.append(f"- **[{doc['title']}]({url})** — {snip}")
        await evt.reply("\n".join(lines))

    async def _handle_doc(self, evt: MessageEvent, body: str) -> None:
        if not body:
            await evt.reply("Usage: `!doc <slug-or-title>` — open a specific page.")
            return
        if not self._index:
            await evt.reply("Wiki index isn't ready yet; try again in a moment.")
            return
        slug, doc = self._lookup(body)
        if slug is None or doc is None:
            await evt.reply(
                f"No page matches `{body}`. Try `!ask {body}` for a fuzzy search."
            )
            return
        url = _make_url(self.config["docs_url"], slug)
        snippet_chars = self.config["snippet_chars"] * 2
        preview = (doc["content"] or "").strip().replace("\n", " ")
        if len(preview) > snippet_chars:
            preview = preview[:snippet_chars].rstrip() + "…"
        await evt.reply(f"**[{doc['title']}]({url})**\n{preview}")

    async def _handle_wiki(self, evt: MessageEvent, body: str) -> None:
        body = body.strip().lower()
        if body == "refresh":
            try:
                await self._refresh()
                await evt.reply(f"🔄 Refreshed {len(self._index)} docs.")
            except Exception as e:
                await evt.reply(f"Refresh failed: {e}")
            return
        if body in ("", "status"):
            age = int(time.time() - self._last_refresh) if self._last_refresh else None
            age_str = f"{age}s ago" if age is not None else "never"
            await evt.reply(
                f"**{self.config['site_name']}** — {len(self._index)} docs, "
                f"last refresh: {age_str}\n"
                f"Source: {_make_url(self.config['docs_url'], self.config['index_path'])}\n"
                f"Use `!ask <query>` or `!doc <slug-or-title>`."
            )
            return
        await evt.reply("Usage: `!wiki` (status) or `!wiki refresh` (re-index).")