diff --git a/src/content.js b/src/content.js index 38a82e5..015991a 100644 --- a/src/content.js +++ b/src/content.js @@ -5,7 +5,7 @@ window.__memosClipperLoaded = true; (function () { // ── Turndown-lite: a minimal but solid HTML→Markdown converter ────────────── - function htmlToMarkdown(element, isSelection = false) { + function htmlToMarkdown(element, isSelection = false, stripLinks = false) { const clone = element.cloneNode(true); // Remove unwanted elements — comprehensive list covering real-world sites @@ -54,15 +54,40 @@ window.__memosClipperLoaded = true; // Also remove elements that are visually hidden via inline style clone.querySelectorAll('[style*="display:none"],[style*="display: none"],[style*="visibility:hidden"]') .forEach((el) => el.remove()); + + // Remove link-dense blocks (navigation menus, ad link lists, etc.) + // Collect candidates first to avoid mid-iteration detached-node issues. + // Only target outer chrome elements (nav, aside, header, footer, div, section) + // not content containers like article/main, to avoid stripping TOCs in prose. + const linkDenseCandidates = Array.from( + clone.querySelectorAll('nav, aside, header, footer, div, section') + ).filter((el) => { + // Skip if inside the primary content container + if (el.closest('article, main, [role="main"]')) return false; + const totalText = (el.textContent || '').trim().length; + if (totalText < 30) return false; // too short to judge + const linkText = Array.from(el.querySelectorAll('a')) + .reduce((sum, a) => sum + (a.textContent || '').trim().length, 0); + if (linkText / totalText <= 0.65) return false; + // Require that the element has little direct (non-link) text of its own + const directText = Array.from(el.childNodes) + .filter((n) => n.nodeType === Node.TEXT_NODE) + .reduce((sum, n) => sum + n.textContent.trim().length, 0); + return directText < totalText * 0.25; + }); + // Remove outermost candidates only (skip those already inside a removed ancestor) + linkDenseCandidates.forEach((el) => { + if (el.isConnected) el.remove(); + }); } else { // In selection mode, we still want to remove script/style tags if any clone.querySelectorAll('script, style, noscript, template').forEach((el) => el.remove()); } - return nodeToMd(clone).replace(/\n{3,}/g, "\n\n").trim(); + return nodeToMd(clone, { listDepth: 0, ordered: false, index: 0 }, stripLinks).replace(/\n{3,}/g, "\n\n").trim(); } - function nodeToMd(node, ctx = { listDepth: 0, ordered: false, index: 0 }) { + function nodeToMd(node, ctx = { listDepth: 0, ordered: false, index: 0 }, stripLinks = false) { if (node.nodeType === Node.TEXT_NODE) { return node.textContent.replace(/\s+/g, " "); } @@ -71,7 +96,7 @@ window.__memosClipperLoaded = true; const tag = node.tagName.toLowerCase(); const children = () => Array.from(node.childNodes) - .map((c) => nodeToMd(c, ctx)) + .map((c) => nodeToMd(c, ctx, stripLinks)) .join(""); switch (tag) { @@ -93,7 +118,8 @@ window.__memosClipperLoaded = true; case "del": return `~~${children()}~~`; case "code": { const text = node.textContent; - return text.includes("`") ? `\`\`${text}\`\`` : `\`${text}\``; + if (text.includes("`")) return `\`\` ${text} \`\``; + return `\`${text}\``; } case "pre": { const codeEl = node.querySelector("code"); @@ -110,8 +136,9 @@ window.__memosClipperLoaded = true; .join("\n")}\n\n`; case "a": { - const href = node.getAttribute("href") || ""; const text = children().trim(); + if (stripLinks) return text; // just the anchor text, no URL + const href = node.getAttribute("href") || ""; if (!text) return href; try { const abs = new URL(href, location.href).href; @@ -134,24 +161,24 @@ window.__memosClipperLoaded = true; case "ul": { const lines = Array.from(node.children) - .map((li) => `${" ".repeat(ctx.listDepth)}- ${nodeToMd(li, { ...ctx, listDepth: ctx.listDepth + 1 }).trim()}`) + .map((li) => `${" ".repeat(ctx.listDepth)}- ${nodeToMd(li, { ...ctx, listDepth: ctx.listDepth + 1 }, stripLinks).trim()}`) .join("\n"); return `\n\n${lines}\n\n`; } case "ol": { const lines = Array.from(node.children) - .map((li, i) => `${" ".repeat(ctx.listDepth)}${i + 1}. ${nodeToMd(li, { ...ctx, listDepth: ctx.listDepth + 1 }).trim()}`) + .map((li, i) => `${" ".repeat(ctx.listDepth)}${i + 1}. ${nodeToMd(li, { ...ctx, listDepth: ctx.listDepth + 1 }, stripLinks).trim()}`) .join("\n"); return `\n\n${lines}\n\n`; } case "li": return children(); - case "table": return convertTable(node); + case "table": return convertTable(node, stripLinks); case "figure": { const img = node.querySelector("img"); const caption = node.querySelector("figcaption"); - let md = img ? nodeToMd(img, ctx) : children(); + let md = img ? nodeToMd(img, ctx, stripLinks) : children(); if (caption) md += `\n*${caption.textContent.trim()}*`; return `\n\n${md}\n\n`; } @@ -178,13 +205,16 @@ window.__memosClipperLoaded = true; } } - function convertTable(table) { + function convertTable(table, stripLinks = false) { const rows = Array.from(table.querySelectorAll("tr")); if (!rows.length) return ""; const toRow = (tr) => "| " + Array.from(tr.querySelectorAll("th,td")) - .map((c) => c.textContent.trim().replace(/\|/g, "\\|")) + .map((c) => { + const text = stripLinks ? c.textContent.trim() : nodeToMd(c).trim(); + return text.replace(/\|/g, "\\|"); + }) .join(" | ") + " |"; const header = toRow(rows[0]); @@ -234,6 +264,7 @@ window.__memosClipperLoaded = true; let markdown = ""; let images = []; let title = document.title || location.href; + const stripLinks = !!msg.stripLinks; if (msg.mode === "selection") { const sel = window.getSelection(); @@ -241,7 +272,7 @@ window.__memosClipperLoaded = true; const frag = sel.getRangeAt(0).cloneContents(); const div = document.createElement("div"); div.appendChild(frag); - markdown = htmlToMarkdown(div, true); + markdown = htmlToMarkdown(div, true, stripLinks); images = extractImages(div); } else { markdown = ""; @@ -253,7 +284,7 @@ window.__memosClipperLoaded = true; document.querySelector("main") || document.querySelector('[role="main"]') || document.body; - markdown = htmlToMarkdown(root); + markdown = htmlToMarkdown(root, false, stripLinks); images = extractImages(root); } diff --git a/src/popup.css b/src/popup.css index 87ee198..b738083 100644 --- a/src/popup.css +++ b/src/popup.css @@ -315,6 +315,16 @@ header { .img-chip .remove-img:hover { color: var(--error); } .img-chip.skipped { opacity: .4; } +/* ── Options row ── */ +#options-row { + display: flex; + align-items: center; + gap: 12px; + padding: 6px 14px; + border-bottom: 1px solid var(--border); + background: var(--surface); +} + /* ── Tags row ── */ #tags-row { padding: 6px 14px; @@ -414,5 +424,21 @@ select option { background: var(--surface); } ::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; } ::-webkit-scrollbar-thumb:hover { background: var(--text-muted); } -/* mode badge */ -#mode-toggle.mode-selection { color: var(--accent); background: var(--accent-dim); border-color: var(--accent); } +/* mode button */ +.mode-btn { + background: transparent; + border: 1px solid var(--border); + border-radius: var(--radius); + padding: 4px 8px; + cursor: pointer; + color: var(--text-dim); + display: flex; + align-items: center; + gap: 4px; + font-family: var(--font); + font-size: 11px; + font-weight: 500; + transition: all .12s; +} +.mode-btn:hover { border-color: var(--accent); color: var(--text); } +.mode-btn.mode-selection { color: var(--accent); background: var(--accent-dim); border-color: var(--accent); } diff --git a/src/popup.html b/src/popup.html index c263ee5..35cba89 100644 --- a/src/popup.html +++ b/src/popup.html @@ -34,10 +34,11 @@ Clip to Memos