'use strict'; const http = require('http'); const EMBED_DIM = 768; // matches nomic-embed-text output dimensions let mode = 'tfidf'; // ── Ollama helpers ───────────────────────────────────────────────────────── function ollamaGet(urlPath) { const base = process.env.OLLAMA_URL || 'http://localhost:11434'; const url = new URL(urlPath, base); return new Promise((resolve, reject) => { const req = http.get(url.toString(), (res) => { let body = ''; res.on('data', (c) => (body += c)); res.on('end', () => { try { resolve(JSON.parse(body)); } catch (e) { reject(e); } }); }); req.on('error', reject); req.setTimeout(5000, () => { req.destroy(new Error('ollama probe timeout')); }); }); } function ollamaPost(urlPath, payload) { const base = process.env.OLLAMA_URL || 'http://localhost:11434'; const url = new URL(urlPath, base); const data = JSON.stringify(payload); return new Promise((resolve, reject) => { const options = { method: 'POST', headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(data), }, }; const req = http.request(url.toString(), options, (res) => { let body = ''; res.on('data', (c) => (body += c)); res.on('end', () => { try { resolve(JSON.parse(body)); } catch (e) { reject(e); } }); }); req.on('error', reject); req.setTimeout(30000, () => { req.destroy(new Error('embed timeout')); }); req.write(data); req.end(); }); } async function probeOllama() { try { const res = await ollamaGet('/api/tags'); const models = (res.models || []).map((m) => m.name || ''); return models.some((n) => n.includes('nomic-embed-text')); } catch { return false; } } async function ollamaEmbed(text) { const res = await ollamaPost('/api/embeddings', { model: 'nomic-embed-text', prompt: text, }); if (!res.embedding) throw new Error('No embedding in Ollama response'); return res.embedding; // number[] } // ── TF-IDF / hashing fallback ─────────────────────────────────────────────── // FNV-1a 32-bit hash function fnv1a(str) { let h = 0x811c9dc5; for (let i = 0; i < str.length; i++) { h ^= str.charCodeAt(i); h = Math.imul(h, 0x01000193) >>> 0; } return h; } function tokenize(text) { return text .toLowerCase() .replace(/[^a-z0-9\s'-]/g, ' ') .split(/\s+/) .filter((t) => t.length > 1); } function tfidfEmbed(text) { const vec = new Float64Array(EMBED_DIM); const tokens = tokenize(text); if (!tokens.length) return Array.from(vec); const tf = {}; for (const t of tokens) tf[t] = (tf[t] || 0) + 1; // Unigrams for (const [term, freq] of Object.entries(tf)) { vec[fnv1a(term) % EMBED_DIM] += freq / tokens.length; } // Bigrams for richer semantic signal for (let i = 0; i < tokens.length - 1; i++) { vec[fnv1a(`${tokens[i]}_${tokens[i + 1]}`) % EMBED_DIM] += 0.5 / tokens.length; } // L2 normalise let norm = 0; for (const v of vec) norm += v * v; norm = Math.sqrt(norm); const out = []; for (let i = 0; i < EMBED_DIM; i++) out.push(norm > 0 ? vec[i] / norm : 0); return out; } // ── Public API ─────────────────────────────────────────────────────────────── async function init() { const hasOllama = await probeOllama(); mode = hasOllama ? 'ollama' : 'tfidf'; const note = mode === 'tfidf' ? ' (nomic-embed-text not found — using keyword fallback)' : ' (nomic-embed-text)'; console.log(`Embedding mode : ${mode}${note}`); return mode; } async function embed(text) { if (mode === 'ollama') { try { return await ollamaEmbed(text); } catch (err) { console.error('Ollama embed failed, falling back to tfidf:', err.message); return tfidfEmbed(text); } } return tfidfEmbed(text); } function getMode() { return mode; } module.exports = { init, embed, getMode };