require('dotenv').config({ path: require('path').resolve(__dirname, '../.env') }); const axios = require('axios'); const OpenAI = require('openai'); const WORKFLOW_VALIDATE_FIELDS = process.env.WORKFLOW_VALIDATE_FIELDS; const OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1'; const BRAND_LLM_MODEL = 'openai/gpt-4o'; const TEMPLATE_LLM_MODEL = 'openai/gpt-4o'; const CURL_LLM_MODEL = 'openai/gpt-4o-mini'; const EDIT_CHECK_LLM_MODEL = 'openai/gpt-4o-mini'; if (!WORKFLOW_VALIDATE_FIELDS) throw new Error('Missing WORKFLOW_VALIDATE_FIELDS environment variable'); const DLT_VARIABLE_SPECS = [ { token: '{#numeric#}', label: '#numeric', purpose: 'Digits-only dynamic values such as OTPs, amounts, or numeric IDs.', validation: 'Only digits are allowed.', }, { token: '{#url#}', label: '#url', purpose: 'Web links.', validation: 'Must resolve to a valid registered HTTP(S) URL.', }, { token: '{#urlott#}', label: '#urlott', purpose: 'OTT or app-download links.', validation: 'Must resolve to a valid registered OTT or APK URL.', }, { token: '{#cbn#}', label: '#cbn', purpose: 'Callback phone numbers.', validation: 'Must resolve to a valid registered callback number.', }, { token: '{#email#}', label: '#email', purpose: 'Email addresses.', validation: 'Must resolve to a syntactically valid email address.', }, { token: '{#alphanumeric#}', label: '#alphanumeric', purpose: 'Mixed letter-and-number values such as order IDs or booking references.', validation: 'Letters and numbers only; avoid spaces and special characters.', }, ]; const LEGACY_DLT_VAR_TOKEN = '{#var#}'; const SUPPORTED_DLT_TOKENS = [LEGACY_DLT_VAR_TOKEN, ...DLT_VARIABLE_SPECS.map((spec) => spec.token)]; const SUPPORTED_DLT_TOKEN_SET = new Set(SUPPORTED_DLT_TOKENS); const DLT_PLACEHOLDER_REGEX = /\{#(?:var|numeric|url|urlott|cbn|email|alphanumeric)#\}/g; const DLT_PLACEHOLDER_LIKE_REGEX = /\{#[^{}]*#\}/g; const TRAI_RULES_TEXT = [ '1) Keep the SMS within 160 characters.', `2) Use only approved placeholders: ${SUPPORTED_DLT_TOKENS.join(', ')}.`, `3) Prefer typed placeholders (${DLT_VARIABLE_SPECS.map((spec) => spec.token).join(', ')}) whenever the value clearly matches that type.`, `4) Use ${LEGACY_DLT_VAR_TOKEN} only as a generic fallback for free-form values such as names, product titles, or addresses that do not fit a stricter typed token.`, '5) Keep the message strictly transactional: no promotional language.', '6) Do not include raw URLs unless the event genuinely requires a link and the placeholder type is appropriate.', '7) Do not append a brand or sender signature in the message body unless the exact registered sender ID is explicitly known and required.', '8) Sender identifiers must remain DLT-compliant.', '9) Allowed punctuation only; avoid malformed symbols or placeholder fragments.', '10) The message must match the event and start with clear order or event context.', ].join(' '); const BRAND_CONTEXT_TONE_OPTIONS = ['friendly', 'professional', 'formal', 'casual', 'energetic']; const EVENT_DESCRIPTIONS = { placed: 'The customer has successfully placed an order', confirmed: 'The order has been confirmed by the seller/warehouse', dp_assigned: 'A delivery partner has been assigned to deliver the order', pack: 'The order has been packed and is ready for dispatch', cancelled: 'The order has been cancelled', delivery_done: 'The order has been successfully delivered to the customer', }; let cachedClient = null; function normalizeText(value) { return typeof value === 'string' ? value.trim() : ''; } function describeDltVariableTypes() { return DLT_VARIABLE_SPECS .map((spec) => `- ${spec.token}: ${spec.purpose} ${spec.validation}`) .join('\n'); } function getUnsupportedDltTokens(text) { return (String(text).match(DLT_PLACEHOLDER_LIKE_REGEX) || []) .filter((token) => !SUPPORTED_DLT_TOKEN_SET.has(token)); } function hasMalformedDltFragments(text) { const stripped = String(text).replace(DLT_PLACEHOLDER_LIKE_REGEX, ''); return stripped.includes('{#') || stripped.includes('#}'); } function validateTemplateStructure(text) { const normalized = normalizeText(text); if (!normalized) return 'Template is empty.'; if (normalized.length > 160) return 'Template exceeds 160 characters.'; const unsupportedTokens = getUnsupportedDltTokens(normalized); if (unsupportedTokens.length > 0) { return `Template uses unsupported placeholders: ${unsupportedTokens.join(', ')}.`; } if (hasMalformedDltFragments(normalized)) { return 'Template contains malformed placeholder text.'; } return ''; } function escapeRegex(value) { return String(value || '').replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } function buildPhraseRegex(phrase) { const normalized = normalizeText(phrase).replace(/\s+/g, ' '); if (!normalized) return null; const parts = normalized.split(' ').filter(Boolean).map(escapeRegex); if (parts.length === 0) return null; return new RegExp(`(^|[^a-z0-9])${parts.join('\\s+')}([^a-z0-9]|$)`, 'i'); } function getBlockedBrandPhrases(options = {}) { const phrases = [ options?.brandName, ...(Array.isArray(options?.brandTaglines) ? options.brandTaglines : []), ] .map((value) => normalizeText(value)) .filter(Boolean); return [...new Set(phrases)]; } function findBlockedBrandPhrase(text, options = {}) { const normalizedText = normalizeText(text); if (!normalizedText) return ''; return getBlockedBrandPhrases(options).find((phrase) => { const matcher = buildPhraseRegex(phrase); return matcher ? matcher.test(normalizedText) : false; }) || ''; } function requestId(prefix) { return `${prefix}_${Date.now()}`; } function parseJsonField(value, fallback) { if (typeof value !== 'string') return value ?? fallback; try { return JSON.parse(value); } catch { return fallback; } } function extractMessageText(content) { if (typeof content === 'string') return content.trim(); if (Array.isArray(content)) { return content .map((entry) => { if (typeof entry === 'string') return entry; if (entry && typeof entry.text === 'string') return entry.text; return ''; }) .join('') .trim(); } return ''; } function tryParseJson(text) { const trimmed = normalizeText(text); if (!trimmed) return null; try { return JSON.parse(trimmed); } catch { // fall through } const fencedMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/i); if (fencedMatch?.[1]) { try { return JSON.parse(fencedMatch[1].trim()); } catch { // fall through } } const firstBrace = trimmed.indexOf('{'); const lastBrace = trimmed.lastIndexOf('}'); if (firstBrace >= 0 && lastBrace > firstBrace) { try { return JSON.parse(trimmed.slice(firstBrace, lastBrace + 1)); } catch { // fall through } } return null; } function isAbsoluteHttpUrl(value) { if (!normalizeText(value)) return false; try { const parsed = new URL(value); return parsed.protocol === 'http:' || parsed.protocol === 'https:'; } catch { return false; } } function getLlmClient() { if (cachedClient) return cachedClient; const apiKey = normalizeText(process.env.OPENROUTER_API_KEY); if (!apiKey) { throw new Error('OPENROUTER_API_KEY is not configured'); } const referer = normalizeText(process.env.EXTENSION_BASE_URL); const appName = 'SMS Extension'; const defaultHeaders = {}; if (referer) defaultHeaders['HTTP-Referer'] = referer; if (appName) defaultHeaders['X-Title'] = appName; cachedClient = new OpenAI({ apiKey, baseURL: OPENROUTER_BASE_URL, defaultHeaders, }); return cachedClient; } async function requestStructuredJson({ model, taskName, systemPrompt, userPrompt, temperature = 0.2 }) { try { const client = getLlmClient(); const completion = await client.chat.completions.create({ model, temperature, response_format: { type: 'json_object' }, messages: [ { role: 'system', content: systemPrompt }, { role: 'user', content: userPrompt }, ], }); const text = extractMessageText(completion?.choices?.[0]?.message?.content); const parsed = tryParseJson(text); if (!parsed || typeof parsed !== 'object') { throw new Error(`${taskName} returned unreadable JSON`); } return parsed; } catch (error) { const details = error.response?.data ? ` | response: ${JSON.stringify(error.response.data)}` : ''; throw new Error(`${taskName} failed: ${error.message}${details}`); } } async function postWorkflow(url, payload) { try { const response = await axios.post(url, payload, { headers: { 'Content-Type': 'application/json' }, maxBodyLength: Infinity, timeout: 60000, }); return response.data; } catch (error) { const details = error.response?.data ? ` | response: ${JSON.stringify(error.response.data)}` : ''; throw new Error(`Workflow API error (${url}): ${error.message}${details}`); } } function sanitizeStringArray(value, options = {}) { const { maxItems = Infinity, allowUrlsOnly = false } = options; if (!Array.isArray(value)) return []; const seen = new Set(); const items = []; value.forEach((entry) => { if (items.length >= maxItems) return; const normalized = normalizeText(String(entry || '')); if (!normalized) return; if (allowUrlsOnly && !isAbsoluteHttpUrl(normalized)) return; if (seen.has(normalized)) return; seen.add(normalized); items.push(normalized); }); return items; } function sanitizeVariableMap(value) { if (!value || typeof value !== 'object' || Array.isArray(value)) return {}; return Object.entries(value).reduce((accumulator, [key, rawValue]) => { const normalizedKey = normalizeText(String(key || '')); const normalizedValue = normalizeText(String(rawValue || '')); if (!normalizedKey || !normalizedValue) return accumulator; accumulator[normalizedKey] = normalizedValue; return accumulator; }, {}); } async function parseBrandContext(scrapedData = {}) { const representativePages = Array.isArray(scrapedData.representativePages) ? scrapedData.representativePages.slice(0, 20) : []; const representativeTextBlocks = Array.isArray(scrapedData.representativeTextBlocks) ? scrapedData.representativeTextBlocks.slice(0, 20) : []; const productPages = Array.isArray(scrapedData.productPages) ? scrapedData.productPages.slice(0, 5) : []; const contentDigest = representativeTextBlocks .map((block) => { const title = String(block?.title || '').trim(); const pageType = String(block?.pageType || '').trim(); const text = String(block?.text || '').trim(); return [title, pageType, text].filter(Boolean).join(' | '); }) .filter(Boolean) .join('\n\n') .slice(0, 14000); const result = await requestStructuredJson({ model: BRAND_LLM_MODEL, taskName: 'Brand context extraction', temperature: 0.2, systemPrompt: 'You are a brand analyst for ecommerce storefronts. Infer brand identity from crawl evidence and return only valid JSON that matches the requested schema exactly.', userPrompt: [ 'Analyze the storefront evidence below and infer brand context.', '', 'Return only valid JSON with exactly these keys:', '{', ' "brandName": "string",', ` "tone": "one of ${BRAND_CONTEXT_TONE_OPTIONS.join(', ')}",`, ' "taglines": ["up to 3 strings"],', ' "colors": ["hex colors only"],', ' "relevantImageUrls": ["3-5 absolute http(s) image URLs only"],', ' "aboutSummary": "2-4 concise customer-facing sentences"', '}', '', 'Constraints:', '- No markdown.', '- No explanatory prose.', '- Do not copy the About page verbatim.', '- Exclude icons, tracking pixels, and data URLs from images.', '', `start_url: ${String(scrapedData.startUrl || '')}`, `domain: ${String(scrapedData.domain || '')}`, `site_stats_json: ${JSON.stringify(scrapedData.siteStats || {})}`, `homepage_json: ${JSON.stringify(scrapedData.homepage || {})}`, `about_page_json: ${JSON.stringify(scrapedData.aboutPage || {})}`, `product_pages_json: ${JSON.stringify(productPages)}`, `contact_page_json: ${JSON.stringify(scrapedData.contactPage || {})}`, `representative_pages_json: ${JSON.stringify(representativePages)}`, `representative_text_blocks_json: ${JSON.stringify(representativeTextBlocks)}`, `navigation_json: ${JSON.stringify(scrapedData.navigation || [])}`, `policy_pages_json: ${JSON.stringify(scrapedData.policyPages || [])}`, `links_json: ${JSON.stringify(scrapedData.links || [])}`, `top_images_json: ${JSON.stringify(scrapedData.topImages || [])}`, `screenshots_json: ${JSON.stringify(scrapedData.screenshots || [])}`, `branding_json: ${JSON.stringify(scrapedData.branding || {})}`, `crawl_summary_json: ${JSON.stringify(scrapedData || {})}`, `content_digest: ${contentDigest}`, ].join('\n'), }); const normalizedTone = normalizeText(String(result.tone || '')).toLowerCase(); return { brandName: normalizeText(String(result.brandName || '')) || 'Unknown Brand', tone: BRAND_CONTEXT_TONE_OPTIONS.includes(normalizedTone) ? normalizedTone : 'professional', taglines: sanitizeStringArray(result.taglines, { maxItems: 3 }), colors: sanitizeStringArray(result.colors), relevantImageUrls: sanitizeStringArray(result.relevantImageUrls, { maxItems: 5, allowUrlsOnly: true }), aboutSummary: normalizeText(String(result.aboutSummary || '')), }; } async function generateTemplates(brandContext = {}, eventSlug, eventLabel, options = {}) { const eventDesc = EVENT_DESCRIPTIONS[eventSlug] || `A "${eventLabel}" event in the order lifecycle`; const registeredSenderId = normalizeText(options?.senderId).toUpperCase(); const blockedBrandPhrases = getBlockedBrandPhrases({ brandName: brandContext?.brandName, brandTaglines: brandContext?.taglines, }); const approvedTemplates = []; const seenTemplates = new Set(); const rejectionReasons = []; for (let attempt = 0; attempt < 2 && approvedTemplates.length < 3; attempt += 1) { const templateCount = attempt === 0 ? 6 : 8; const result = await requestStructuredJson({ model: TEMPLATE_LLM_MODEL, taskName: 'SMS template generation', temperature: 0.45, systemPrompt: 'You are an expert in Indian transactional SMS templates. Follow the provided constraints exactly, self-check against them, and return only valid JSON.', userPrompt: [ `Generate exactly ${templateCount} distinct transactional SMS templates.`, '', `Brand: ${String(brandContext.brandName || '')}`, `Tone: ${String(brandContext.tone || '')}`, `Taglines: ${JSON.stringify(Array.isArray(brandContext.taglines) ? brandContext.taglines : [])}`, `Event slug: ${String(eventSlug || '')}`, `Event label: ${String(eventLabel || '')}`, `Event description: ${eventDesc}`, `Registered sender ID: ${registeredSenderId || 'Not provided. Do not append any brand or sender signature.'}`, '', `Rules: ${TRAI_RULES_TEXT}`, '', 'Approved placeholder types:', describeDltVariableTypes(), `- ${LEGACY_DLT_VAR_TOKEN}: Generic fallback for free-form values such as customer names, product names, or addresses when a stricter typed token does not fit.`, '', 'Each template must:', '- be under 160 characters', '- start with clear event or order context', '- match the event accurately', '- avoid promotional language', '- avoid raw URLs unless clearly required for the event', '- never mention the brand name or tagline in the message body unless the exact registered sender ID is explicitly required and provided', blockedBrandPhrases.length > 0 ? `- specifically do not include these phrases: ${blockedBrandPhrases.join(', ')}` : '', '', rejectionReasons.length > 0 ? `Avoid these issues seen in rejected drafts: ${rejectionReasons.slice(-6).join(' | ')}` : '', '', 'Return only valid JSON with exactly this shape:', `{ "templates": ["template 1", "template 2", "... up to ${templateCount} templates"] }`, ].filter(Boolean).join('\n'), }); const candidateTemplates = sanitizeStringArray(result.templates, { maxItems: templateCount }); for (const candidate of candidateTemplates) { if (approvedTemplates.length >= 3) break; if (seenTemplates.has(candidate)) continue; seenTemplates.add(candidate); const structureIssue = validateTemplateStructure(candidate); if (structureIssue) { rejectionReasons.push(structureIssue); continue; } const blockedPhrase = findBlockedBrandPhrase(candidate, { brandName: brandContext?.brandName, brandTaglines: brandContext?.taglines, }); if (blockedPhrase) { rejectionReasons.push(`Do not mention "${blockedPhrase}" in the SMS body.`); continue; } const validation = await validateEditedTemplate(candidate, { senderId: registeredSenderId, eventSlug, eventLabel, brandName: brandContext?.brandName, brandTaglines: brandContext?.taglines, }); if (validation.approved) { approvedTemplates.push(candidate); continue; } if (validation.why) { rejectionReasons.push(validation.why); } } } if (approvedTemplates.length < 3) { throw new Error('Could not generate 3 compliant templates. Please try again.'); } return approvedTemplates.slice(0, 3); } async function processCurl(rawCurl, approvedTemplate, eventSlug) { const result = await requestStructuredJson({ model: CURL_LLM_MODEL, taskName: 'Provider cURL processing', temperature: 0.1, systemPrompt: 'You are an SMS provider integration expert. Analyze raw provider curls, infer semantic placeholders, and return only valid JSON.', userPrompt: [ 'Analyze the provider cURL and return a structured placeholder mapping.', '', `Approved SMS template:\n${String(approvedTemplate || '')}`, '', `Event slug: ${String(eventSlug || '')}`, '', `Raw cURL:\n${String(rawCurl || '')}`, '', 'Instructions:', '- identify all placeholder formats in the cURL', '- infer semantic field names in camelCase', '- normalize placeholders inside processedCurl using those camelCase field names', '- build variableMap using the exact DLT token text from the approved template in appearance order', `- supported DLT token types include ${SUPPORTED_DLT_TOKENS.join(', ')}`, '', 'Return only valid JSON with exactly this shape:', '{', ' "processedCurl": "string",', ' "variableMap": { "{#numeric#}[0]": "fieldName", "{#var#}[1]": "fieldName" }', '}', ].join('\n'), }); return { processedCurl: String(result.processedCurl || ''), variableMap: sanitizeVariableMap(result.variableMap), }; } async function validateEditedTemplate(editedTemplate, options = {}) { const structureIssue = validateTemplateStructure(editedTemplate); if (structureIssue) { return { approved: false, why: structureIssue, workflowResult: { approved: false, why: structureIssue, source: 'deterministic' }, }; } const registeredSenderId = normalizeText(options?.senderId).toUpperCase(); const eventSlug = normalizeText(options?.eventSlug); const eventLabel = normalizeText(options?.eventLabel); const brandName = normalizeText(options?.brandName); const blockedBrandPhrase = findBlockedBrandPhrase(editedTemplate, options); if (blockedBrandPhrase) { return { approved: false, why: `Remove the brand reference "${blockedBrandPhrase}" from the message body.`, workflowResult: { approved: false, why: `Blocked brand phrase: ${blockedBrandPhrase}`, source: 'deterministic' }, }; } const result = await requestStructuredJson({ model: EDIT_CHECK_LLM_MODEL, taskName: 'Edited template validation', temperature: 0, systemPrompt: 'You validate Indian transactional SMS templates for compliance and clarity. Return only valid JSON.', userPrompt: [ 'Review this edited SMS template and decide whether it should be approved.', '', `Template:\n${String(editedTemplate || '')}`, '', eventSlug ? `Event slug: ${eventSlug}` : '', eventLabel ? `Event label: ${eventLabel}` : '', brandName ? `Brand name: ${brandName}` : '', `Registered sender ID: ${registeredSenderId || 'Not provided. Reject appended brand or sender signatures.'}`, '', `Rules: ${TRAI_RULES_TEXT}`, '', 'Approved placeholder types:', describeDltVariableTypes(), `- ${LEGACY_DLT_VAR_TOKEN}: Generic fallback for free-form values such as names, product names, or addresses when a stricter typed token does not fit.`, '', 'Approval guidance:', '- approve only if the template is clear, transactional, and appears compliant with the rules', '- approve typed placeholders like {#numeric#}, {#url#}, {#urlott#}, {#cbn#}, {#email#}, and {#alphanumeric#} when they match the intended dynamic value type', `- allow ${LEGACY_DLT_VAR_TOKEN} only as a generic fallback for free-form content that does not fit a stricter typed token`, '- reject if a more precise typed token should clearly replace a generic one for numeric, URL, callback, email, or alphanumeric values', '- reject if the message mentions the brand name, tagline, or a brand-style signoff in the body', '- reject if the message appends a sender signature that does not exactly match the registered sender ID', '- reject if it is too promotional, malformed, ambiguous, or clearly non-compliant', '- keep the explanation concise and actionable', '', 'Return only valid JSON with exactly this shape:', '{ "approved": true, "why": "short explanation" }', ].join('\n'), }); const approved = typeof result.approved === 'boolean' ? result.approved : ['approved', 'pass', 'passed', 'valid', 'ok', 'true'].includes(normalizeText(String(result.approved || result.status || '')).toLowerCase()); return { approved, why: normalizeText(String(result.why || result.reason || result.message || '')), workflowResult: result, }; } async function validateCurlFields(rawCurl) { const payload = { curl_b64: Buffer.from(String(rawCurl || ''), 'utf8').toString('base64'), }; const data = await postWorkflow(WORKFLOW_VALIDATE_FIELDS, payload); const output = typeof data === 'string' ? parseJsonField(data, {}) : (data || {}); const isValidCurl = output.is_valid_curl === true || String(output.is_valid_curl).toLowerCase() === 'true'; return { isValidCurl, provider: { providerName: String(output.provider_name || '').trim(), senderId: String(output.dlt_sender_id || '').trim().toUpperCase(), dltEntityId: String(output.dlt_entity_id || '').trim(), authKey: String(output.api_auth_key || '').trim(), }, reason: String(output.reason || '').trim(), }; } module.exports = { parseBrandContext, generateTemplates, processCurl, validateEditedTemplate, validateCurlFields, };