fix(issues): decode @mention entities without lockfile or new deps

- Drop entities package (CI blocks pnpm-lock.yaml on PRs; reset lockfile to master) - Restore numeric + allowlisted named entity decoding in issues.ts - Split Greptile mid-token & case into its own test with review comment Made-with: Cursor
2026-03-25 11:21:48 +00:00 · 2026-03-24 15:22:21 +02:00
parent 53f0988006
commit 2735ef1f4a
4 changed files with 7654 additions and 7542 deletions
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/server/package.json
+++ b/server/package.json
@@ -64,7 +64,6 @@
    "dotenv": "^17.0.1",
    "drizzle-orm": "^0.38.4",
    "embedded-postgres": "^18.1.0-beta.16",
-    "entities": "^8.0.0",
    "express": "^5.1.0",
    "hermes-paperclip-adapter": "0.1.1",
    "jsdom": "^28.1.0",
--- a/server/src/tests/normalize-agent-mention-token.test.ts
+++ b/server/src/tests/normalize-agent-mention-token.test.ts
@@ -14,12 +14,16 @@ describe("normalizeAgentMentionToken", () => {
    expect(normalizeAgentMentionToken("Baba&nbsp;")).toBe("Baba");
  });

-  it("decodes named entities mid-token so agent names can include &", () => {
+  // Greptile: entity mid-token (not only trailing) — must decode &amp; to &, not delete the sequence.
+  it("decodes &amp; in the middle of a mention token", () => {
    expect(normalizeAgentMentionToken("Ba&amp;ba")).toBe("Ba&ba");
+  });
+
+  it("decodes &amp; so agent names with ampersands still match", () => {
    expect(normalizeAgentMentionToken("M&amp;M")).toBe("M&M");
  });

-  it("decodes named entities mid-token (e.g. copyright) for full HTML named coverage", () => {
+  it("decodes additional named entities used in rich text (e.g. &copy;)", () => {
    expect(normalizeAgentMentionToken("Agent&copy;Name")).toBe("Agent©Name");
  });

--- a/server/src/services/issues.ts
+++ b/server/src/services/issues.ts
@@ -21,7 +21,6 @@ import {
  projects,
 } from "@paperclipai/db";
 import { extractAgentMentionIds, extractProjectMentionIds } from "@paperclipai/shared";
-import { decodeHTMLStrict } from "entities";
 import { conflict, notFound, unprocessable } from "../errors.js";
 import {
  defaultIssueExecutionWorkspaceSettingsForProject,
@@ -219,12 +218,39 @@ function unreadForUserCondition(companyId: string, userId: string) {
  `;
 }

-/**
- * Decodes HTML character references in a raw @mention capture (WHATWG HTML, strict semicolon form)
- * so rich-text / UI-encoded bodies still match agent names.
- */
+/** Named entities commonly emitted in saved issue bodies; unknown `&name;` sequences are left unchanged. */
+const WELL_KNOWN_NAMED_HTML_ENTITIES: Readonly<Record<string, string>> = {
+  amp: "&",
+  apos: "'",
+  copy: "\u00A9",
+  gt: ">",
+  lt: "<",
+  nbsp: "\u00A0",
+  quot: '"',
+  ensp: "\u2002",
+  emsp: "\u2003",
+  thinsp: "\u2009",
+};
+
+function decodeNumericHtmlEntity(digits: string, radix: 16 | 10): string | null {
+  const n = Number.parseInt(digits, radix);
+  if (Number.isNaN(n) || n < 0 || n > 0x10ffff) return null;
+  try {
+    return String.fromCodePoint(n);
+  } catch {
+    return null;
+  }
+}
+
+/** Decodes HTML character references in a raw @mention capture so UI-encoded bodies match agent names. */
 export function normalizeAgentMentionToken(raw: string): string {
-  return decodeHTMLStrict(raw).trim();
+  let s = raw.replace(/&#x([0-9a-fA-F]+);/gi, (full, hex: string) => decodeNumericHtmlEntity(hex, 16) ?? full);
+  s = s.replace(/&#([0-9]+);/g, (full, dec: string) => decodeNumericHtmlEntity(dec, 10) ?? full);
+  s = s.replace(/&([a-z][a-z0-9]*);/gi, (full, name: string) => {
+    const decoded = WELL_KNOWN_NAMED_HTML_ENTITIES[name.toLowerCase()];
+    return decoded !== undefined ? decoded : full;
+  });
+  return s.trim();
 }

 export function deriveIssueUserContext(