sms-extension/server/services/openai2.js

637 lines
25 KiB
JavaScript

require('dotenv').config({ path: require('path').resolve(__dirname, '../.env') });
const axios = require('axios');
const OpenAI = require('openai');
const WORKFLOW_VALIDATE_FIELDS = process.env.WORKFLOW_VALIDATE_FIELDS;
const OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1';
const BRAND_LLM_MODEL = 'openai/gpt-4o';
const TEMPLATE_LLM_MODEL = 'openai/gpt-4o';
const CURL_LLM_MODEL = 'openai/gpt-4o-mini';
const EDIT_CHECK_LLM_MODEL = 'openai/gpt-4o-mini';
if (!WORKFLOW_VALIDATE_FIELDS) throw new Error('Missing WORKFLOW_VALIDATE_FIELDS environment variable');
const DLT_VARIABLE_SPECS = [
{
token: '{#numeric#}',
label: '#numeric',
purpose: 'Digits-only dynamic values such as OTPs, amounts, or numeric IDs.',
validation: 'Only digits are allowed.',
},
{
token: '{#url#}',
label: '#url',
purpose: 'Web links.',
validation: 'Must resolve to a valid registered HTTP(S) URL.',
},
{
token: '{#urlott#}',
label: '#urlott',
purpose: 'OTT or app-download links.',
validation: 'Must resolve to a valid registered OTT or APK URL.',
},
{
token: '{#cbn#}',
label: '#cbn',
purpose: 'Callback phone numbers.',
validation: 'Must resolve to a valid registered callback number.',
},
{
token: '{#email#}',
label: '#email',
purpose: 'Email addresses.',
validation: 'Must resolve to a syntactically valid email address.',
},
{
token: '{#alphanumeric#}',
label: '#alphanumeric',
purpose: 'Mixed letter-and-number values such as order IDs or booking references.',
validation: 'Letters and numbers only; avoid spaces and special characters.',
},
];
const LEGACY_DLT_VAR_TOKEN = '{#var#}';
const SUPPORTED_DLT_TOKENS = [LEGACY_DLT_VAR_TOKEN, ...DLT_VARIABLE_SPECS.map((spec) => spec.token)];
const SUPPORTED_DLT_TOKEN_SET = new Set(SUPPORTED_DLT_TOKENS);
const DLT_PLACEHOLDER_REGEX = /\{#(?:var|numeric|url|urlott|cbn|email|alphanumeric)#\}/g;
const DLT_PLACEHOLDER_LIKE_REGEX = /\{#[^{}]*#\}/g;
const TRAI_RULES_TEXT = [
'1) Keep the SMS within 160 characters.',
`2) Use only approved placeholders: ${SUPPORTED_DLT_TOKENS.join(', ')}.`,
`3) Prefer typed placeholders (${DLT_VARIABLE_SPECS.map((spec) => spec.token).join(', ')}) whenever the value clearly matches that type.`,
`4) Use ${LEGACY_DLT_VAR_TOKEN} only as a generic fallback for free-form values such as names, product titles, or addresses that do not fit a stricter typed token.`,
'5) Keep the message strictly transactional: no promotional language.',
'6) Do not include raw URLs unless the event genuinely requires a link and the placeholder type is appropriate.',
'7) Do not append a brand or sender signature in the message body unless the exact registered sender ID is explicitly known and required.',
'8) Sender identifiers must remain DLT-compliant.',
'9) Allowed punctuation only; avoid malformed symbols or placeholder fragments.',
'10) The message must match the event and start with clear order or event context.',
].join(' ');
const BRAND_CONTEXT_TONE_OPTIONS = ['friendly', 'professional', 'formal', 'casual', 'energetic'];
const EVENT_DESCRIPTIONS = {
placed: 'The customer has successfully placed an order',
confirmed: 'The order has been confirmed by the seller/warehouse',
dp_assigned: 'A delivery partner has been assigned to deliver the order',
pack: 'The order has been packed and is ready for dispatch',
cancelled: 'The order has been cancelled',
delivery_done: 'The order has been successfully delivered to the customer',
};
let cachedClient = null;
function normalizeText(value) {
return typeof value === 'string' ? value.trim() : '';
}
function describeDltVariableTypes() {
return DLT_VARIABLE_SPECS
.map((spec) => `- ${spec.token}: ${spec.purpose} ${spec.validation}`)
.join('\n');
}
function getUnsupportedDltTokens(text) {
return (String(text).match(DLT_PLACEHOLDER_LIKE_REGEX) || [])
.filter((token) => !SUPPORTED_DLT_TOKEN_SET.has(token));
}
function hasMalformedDltFragments(text) {
const stripped = String(text).replace(DLT_PLACEHOLDER_LIKE_REGEX, '');
return stripped.includes('{#') || stripped.includes('#}');
}
function validateTemplateStructure(text) {
const normalized = normalizeText(text);
if (!normalized) return 'Template is empty.';
if (normalized.length > 160) return 'Template exceeds 160 characters.';
const unsupportedTokens = getUnsupportedDltTokens(normalized);
if (unsupportedTokens.length > 0) {
return `Template uses unsupported placeholders: ${unsupportedTokens.join(', ')}.`;
}
if (hasMalformedDltFragments(normalized)) {
return 'Template contains malformed placeholder text.';
}
return '';
}
function escapeRegex(value) {
return String(value || '').replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
function buildPhraseRegex(phrase) {
const normalized = normalizeText(phrase).replace(/\s+/g, ' ');
if (!normalized) return null;
const parts = normalized.split(' ').filter(Boolean).map(escapeRegex);
if (parts.length === 0) return null;
return new RegExp(`(^|[^a-z0-9])${parts.join('\\s+')}([^a-z0-9]|$)`, 'i');
}
function getBlockedBrandPhrases(options = {}) {
const phrases = [
options?.brandName,
...(Array.isArray(options?.brandTaglines) ? options.brandTaglines : []),
]
.map((value) => normalizeText(value))
.filter(Boolean);
return [...new Set(phrases)];
}
function findBlockedBrandPhrase(text, options = {}) {
const normalizedText = normalizeText(text);
if (!normalizedText) return '';
return getBlockedBrandPhrases(options).find((phrase) => {
const matcher = buildPhraseRegex(phrase);
return matcher ? matcher.test(normalizedText) : false;
}) || '';
}
function requestId(prefix) {
return `${prefix}_${Date.now()}`;
}
function parseJsonField(value, fallback) {
if (typeof value !== 'string') return value ?? fallback;
try {
return JSON.parse(value);
} catch {
return fallback;
}
}
function extractMessageText(content) {
if (typeof content === 'string') return content.trim();
if (Array.isArray(content)) {
return content
.map((entry) => {
if (typeof entry === 'string') return entry;
if (entry && typeof entry.text === 'string') return entry.text;
return '';
})
.join('')
.trim();
}
return '';
}
function tryParseJson(text) {
const trimmed = normalizeText(text);
if (!trimmed) return null;
try {
return JSON.parse(trimmed);
} catch {
// fall through
}
const fencedMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/i);
if (fencedMatch?.[1]) {
try {
return JSON.parse(fencedMatch[1].trim());
} catch {
// fall through
}
}
const firstBrace = trimmed.indexOf('{');
const lastBrace = trimmed.lastIndexOf('}');
if (firstBrace >= 0 && lastBrace > firstBrace) {
try {
return JSON.parse(trimmed.slice(firstBrace, lastBrace + 1));
} catch {
// fall through
}
}
return null;
}
function isAbsoluteHttpUrl(value) {
if (!normalizeText(value)) return false;
try {
const parsed = new URL(value);
return parsed.protocol === 'http:' || parsed.protocol === 'https:';
} catch {
return false;
}
}
function getLlmClient() {
if (cachedClient) return cachedClient;
const apiKey = normalizeText(process.env.OPENROUTER_API_KEY);
if (!apiKey) {
throw new Error('OPENROUTER_API_KEY is not configured');
}
const referer = normalizeText(process.env.EXTENSION_BASE_URL);
const appName = 'SMS Extension';
const defaultHeaders = {};
if (referer) defaultHeaders['HTTP-Referer'] = referer;
if (appName) defaultHeaders['X-Title'] = appName;
cachedClient = new OpenAI({
apiKey,
baseURL: OPENROUTER_BASE_URL,
defaultHeaders,
});
return cachedClient;
}
async function requestStructuredJson({ model, taskName, systemPrompt, userPrompt, temperature = 0.2 }) {
try {
const client = getLlmClient();
const completion = await client.chat.completions.create({
model,
temperature,
response_format: { type: 'json_object' },
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userPrompt },
],
});
const text = extractMessageText(completion?.choices?.[0]?.message?.content);
const parsed = tryParseJson(text);
if (!parsed || typeof parsed !== 'object') {
throw new Error(`${taskName} returned unreadable JSON`);
}
return parsed;
} catch (error) {
const details = error.response?.data ? ` | response: ${JSON.stringify(error.response.data)}` : '';
throw new Error(`${taskName} failed: ${error.message}${details}`);
}
}
async function postWorkflow(url, payload) {
try {
const response = await axios.post(url, payload, {
headers: { 'Content-Type': 'application/json' },
maxBodyLength: Infinity,
timeout: 60000,
});
return response.data;
} catch (error) {
const details = error.response?.data ? ` | response: ${JSON.stringify(error.response.data)}` : '';
throw new Error(`Workflow API error (${url}): ${error.message}${details}`);
}
}
function sanitizeStringArray(value, options = {}) {
const { maxItems = Infinity, allowUrlsOnly = false } = options;
if (!Array.isArray(value)) return [];
const seen = new Set();
const items = [];
value.forEach((entry) => {
if (items.length >= maxItems) return;
const normalized = normalizeText(String(entry || ''));
if (!normalized) return;
if (allowUrlsOnly && !isAbsoluteHttpUrl(normalized)) return;
if (seen.has(normalized)) return;
seen.add(normalized);
items.push(normalized);
});
return items;
}
function sanitizeVariableMap(value) {
if (!value || typeof value !== 'object' || Array.isArray(value)) return {};
return Object.entries(value).reduce((accumulator, [key, rawValue]) => {
const normalizedKey = normalizeText(String(key || ''));
const normalizedValue = normalizeText(String(rawValue || ''));
if (!normalizedKey || !normalizedValue) return accumulator;
accumulator[normalizedKey] = normalizedValue;
return accumulator;
}, {});
}
async function parseBrandContext(scrapedData = {}) {
const representativePages = Array.isArray(scrapedData.representativePages)
? scrapedData.representativePages.slice(0, 20)
: [];
const representativeTextBlocks = Array.isArray(scrapedData.representativeTextBlocks)
? scrapedData.representativeTextBlocks.slice(0, 20)
: [];
const productPages = Array.isArray(scrapedData.productPages)
? scrapedData.productPages.slice(0, 5)
: [];
const contentDigest = representativeTextBlocks
.map((block) => {
const title = String(block?.title || '').trim();
const pageType = String(block?.pageType || '').trim();
const text = String(block?.text || '').trim();
return [title, pageType, text].filter(Boolean).join(' | ');
})
.filter(Boolean)
.join('\n\n')
.slice(0, 14000);
const result = await requestStructuredJson({
model: BRAND_LLM_MODEL,
taskName: 'Brand context extraction',
temperature: 0.2,
systemPrompt: 'You are a brand analyst for ecommerce storefronts. Infer brand identity from crawl evidence and return only valid JSON that matches the requested schema exactly.',
userPrompt: [
'Analyze the storefront evidence below and infer brand context.',
'',
'Return only valid JSON with exactly these keys:',
'{',
' "brandName": "string",',
` "tone": "one of ${BRAND_CONTEXT_TONE_OPTIONS.join(', ')}",`,
' "taglines": ["up to 3 strings"],',
' "colors": ["hex colors only"],',
' "relevantImageUrls": ["3-5 absolute http(s) image URLs only"],',
' "aboutSummary": "2-4 concise customer-facing sentences"',
'}',
'',
'Constraints:',
'- No markdown.',
'- No explanatory prose.',
'- Do not copy the About page verbatim.',
'- Exclude icons, tracking pixels, and data URLs from images.',
'',
`start_url: ${String(scrapedData.startUrl || '')}`,
`domain: ${String(scrapedData.domain || '')}`,
`site_stats_json: ${JSON.stringify(scrapedData.siteStats || {})}`,
`homepage_json: ${JSON.stringify(scrapedData.homepage || {})}`,
`about_page_json: ${JSON.stringify(scrapedData.aboutPage || {})}`,
`product_pages_json: ${JSON.stringify(productPages)}`,
`contact_page_json: ${JSON.stringify(scrapedData.contactPage || {})}`,
`representative_pages_json: ${JSON.stringify(representativePages)}`,
`representative_text_blocks_json: ${JSON.stringify(representativeTextBlocks)}`,
`navigation_json: ${JSON.stringify(scrapedData.navigation || [])}`,
`policy_pages_json: ${JSON.stringify(scrapedData.policyPages || [])}`,
`links_json: ${JSON.stringify(scrapedData.links || [])}`,
`top_images_json: ${JSON.stringify(scrapedData.topImages || [])}`,
`screenshots_json: ${JSON.stringify(scrapedData.screenshots || [])}`,
`branding_json: ${JSON.stringify(scrapedData.branding || {})}`,
`crawl_summary_json: ${JSON.stringify(scrapedData || {})}`,
`content_digest: ${contentDigest}`,
].join('\n'),
});
const normalizedTone = normalizeText(String(result.tone || '')).toLowerCase();
return {
brandName: normalizeText(String(result.brandName || '')) || 'Unknown Brand',
tone: BRAND_CONTEXT_TONE_OPTIONS.includes(normalizedTone) ? normalizedTone : 'professional',
taglines: sanitizeStringArray(result.taglines, { maxItems: 3 }),
colors: sanitizeStringArray(result.colors),
relevantImageUrls: sanitizeStringArray(result.relevantImageUrls, { maxItems: 5, allowUrlsOnly: true }),
aboutSummary: normalizeText(String(result.aboutSummary || '')),
};
}
async function generateTemplates(brandContext = {}, eventSlug, eventLabel, options = {}) {
const eventDesc = EVENT_DESCRIPTIONS[eventSlug] || `A "${eventLabel}" event in the order lifecycle`;
const registeredSenderId = normalizeText(options?.senderId).toUpperCase();
const blockedBrandPhrases = getBlockedBrandPhrases({
brandName: brandContext?.brandName,
brandTaglines: brandContext?.taglines,
});
const approvedTemplates = [];
const seenTemplates = new Set();
const rejectionReasons = [];
for (let attempt = 0; attempt < 2 && approvedTemplates.length < 3; attempt += 1) {
const templateCount = attempt === 0 ? 6 : 8;
const result = await requestStructuredJson({
model: TEMPLATE_LLM_MODEL,
taskName: 'SMS template generation',
temperature: 0.45,
systemPrompt: 'You are an expert in Indian transactional SMS templates. Follow the provided constraints exactly, self-check against them, and return only valid JSON.',
userPrompt: [
`Generate exactly ${templateCount} distinct transactional SMS templates.`,
'',
`Brand: ${String(brandContext.brandName || '')}`,
`Tone: ${String(brandContext.tone || '')}`,
`Taglines: ${JSON.stringify(Array.isArray(brandContext.taglines) ? brandContext.taglines : [])}`,
`Event slug: ${String(eventSlug || '')}`,
`Event label: ${String(eventLabel || '')}`,
`Event description: ${eventDesc}`,
`Registered sender ID: ${registeredSenderId || 'Not provided. Do not append any brand or sender signature.'}`,
'',
`Rules: ${TRAI_RULES_TEXT}`,
'',
'Approved placeholder types:',
describeDltVariableTypes(),
`- ${LEGACY_DLT_VAR_TOKEN}: Generic fallback for free-form values such as customer names, product names, or addresses when a stricter typed token does not fit.`,
'',
'Each template must:',
'- be under 160 characters',
'- start with clear event or order context',
'- match the event accurately',
'- avoid promotional language',
'- avoid raw URLs unless clearly required for the event',
'- never mention the brand name or tagline in the message body unless the exact registered sender ID is explicitly required and provided',
blockedBrandPhrases.length > 0
? `- specifically do not include these phrases: ${blockedBrandPhrases.join(', ')}`
: '',
'',
rejectionReasons.length > 0
? `Avoid these issues seen in rejected drafts: ${rejectionReasons.slice(-6).join(' | ')}`
: '',
'',
'Return only valid JSON with exactly this shape:',
`{ "templates": ["template 1", "template 2", "... up to ${templateCount} templates"] }`,
].filter(Boolean).join('\n'),
});
const candidateTemplates = sanitizeStringArray(result.templates, { maxItems: templateCount });
for (const candidate of candidateTemplates) {
if (approvedTemplates.length >= 3) break;
if (seenTemplates.has(candidate)) continue;
seenTemplates.add(candidate);
const structureIssue = validateTemplateStructure(candidate);
if (structureIssue) {
rejectionReasons.push(structureIssue);
continue;
}
const blockedPhrase = findBlockedBrandPhrase(candidate, {
brandName: brandContext?.brandName,
brandTaglines: brandContext?.taglines,
});
if (blockedPhrase) {
rejectionReasons.push(`Do not mention "${blockedPhrase}" in the SMS body.`);
continue;
}
const validation = await validateEditedTemplate(candidate, {
senderId: registeredSenderId,
eventSlug,
eventLabel,
brandName: brandContext?.brandName,
brandTaglines: brandContext?.taglines,
});
if (validation.approved) {
approvedTemplates.push(candidate);
continue;
}
if (validation.why) {
rejectionReasons.push(validation.why);
}
}
}
if (approvedTemplates.length < 3) {
throw new Error('Could not generate 3 compliant templates. Please try again.');
}
return approvedTemplates.slice(0, 3);
}
async function processCurl(rawCurl, approvedTemplate, eventSlug) {
const result = await requestStructuredJson({
model: CURL_LLM_MODEL,
taskName: 'Provider cURL processing',
temperature: 0.1,
systemPrompt: 'You are an SMS provider integration expert. Analyze raw provider curls, infer semantic placeholders, and return only valid JSON.',
userPrompt: [
'Analyze the provider cURL and return a structured placeholder mapping.',
'',
`Approved SMS template:\n${String(approvedTemplate || '')}`,
'',
`Event slug: ${String(eventSlug || '')}`,
'',
`Raw cURL:\n${String(rawCurl || '')}`,
'',
'Instructions:',
'- identify all placeholder formats in the cURL',
'- infer semantic field names in camelCase',
'- normalize placeholders inside processedCurl using those camelCase field names',
'- build variableMap using the exact DLT token text from the approved template in appearance order',
`- supported DLT token types include ${SUPPORTED_DLT_TOKENS.join(', ')}`,
'',
'Return only valid JSON with exactly this shape:',
'{',
' "processedCurl": "string",',
' "variableMap": { "{#numeric#}[0]": "fieldName", "{#var#}[1]": "fieldName" }',
'}',
].join('\n'),
});
return {
processedCurl: String(result.processedCurl || ''),
variableMap: sanitizeVariableMap(result.variableMap),
};
}
async function validateEditedTemplate(editedTemplate, options = {}) {
const structureIssue = validateTemplateStructure(editedTemplate);
if (structureIssue) {
return {
approved: false,
why: structureIssue,
workflowResult: { approved: false, why: structureIssue, source: 'deterministic' },
};
}
const registeredSenderId = normalizeText(options?.senderId).toUpperCase();
const eventSlug = normalizeText(options?.eventSlug);
const eventLabel = normalizeText(options?.eventLabel);
const brandName = normalizeText(options?.brandName);
const blockedBrandPhrase = findBlockedBrandPhrase(editedTemplate, options);
if (blockedBrandPhrase) {
return {
approved: false,
why: `Remove the brand reference "${blockedBrandPhrase}" from the message body.`,
workflowResult: { approved: false, why: `Blocked brand phrase: ${blockedBrandPhrase}`, source: 'deterministic' },
};
}
const result = await requestStructuredJson({
model: EDIT_CHECK_LLM_MODEL,
taskName: 'Edited template validation',
temperature: 0,
systemPrompt: 'You validate Indian transactional SMS templates for compliance and clarity. Return only valid JSON.',
userPrompt: [
'Review this edited SMS template and decide whether it should be approved.',
'',
`Template:\n${String(editedTemplate || '')}`,
'',
eventSlug ? `Event slug: ${eventSlug}` : '',
eventLabel ? `Event label: ${eventLabel}` : '',
brandName ? `Brand name: ${brandName}` : '',
`Registered sender ID: ${registeredSenderId || 'Not provided. Reject appended brand or sender signatures.'}`,
'',
`Rules: ${TRAI_RULES_TEXT}`,
'',
'Approved placeholder types:',
describeDltVariableTypes(),
`- ${LEGACY_DLT_VAR_TOKEN}: Generic fallback for free-form values such as names, product names, or addresses when a stricter typed token does not fit.`,
'',
'Approval guidance:',
'- approve only if the template is clear, transactional, and appears compliant with the rules',
'- approve typed placeholders like {#numeric#}, {#url#}, {#urlott#}, {#cbn#}, {#email#}, and {#alphanumeric#} when they match the intended dynamic value type',
`- allow ${LEGACY_DLT_VAR_TOKEN} only as a generic fallback for free-form content that does not fit a stricter typed token`,
'- reject if a more precise typed token should clearly replace a generic one for numeric, URL, callback, email, or alphanumeric values',
'- reject if the message mentions the brand name, tagline, or a brand-style signoff in the body',
'- reject if the message appends a sender signature that does not exactly match the registered sender ID',
'- reject if it is too promotional, malformed, ambiguous, or clearly non-compliant',
'- keep the explanation concise and actionable',
'',
'Return only valid JSON with exactly this shape:',
'{ "approved": true, "why": "short explanation" }',
].join('\n'),
});
const approved = typeof result.approved === 'boolean'
? result.approved
: ['approved', 'pass', 'passed', 'valid', 'ok', 'true'].includes(normalizeText(String(result.approved || result.status || '')).toLowerCase());
return {
approved,
why: normalizeText(String(result.why || result.reason || result.message || '')),
workflowResult: result,
};
}
async function validateCurlFields(rawCurl) {
const payload = {
curl_b64: Buffer.from(String(rawCurl || ''), 'utf8').toString('base64'),
};
const data = await postWorkflow(WORKFLOW_VALIDATE_FIELDS, payload);
const output = typeof data === 'string' ? parseJsonField(data, {}) : (data || {});
const isValidCurl = output.is_valid_curl === true || String(output.is_valid_curl).toLowerCase() === 'true';
return {
isValidCurl,
provider: {
providerName: String(output.provider_name || '').trim(),
senderId: String(output.dlt_sender_id || '').trim().toUpperCase(),
dltEntityId: String(output.dlt_entity_id || '').trim(),
authKey: String(output.api_auth_key || '').trim(),
},
reason: String(output.reason || '').trim(),
};
}
module.exports = {
parseBrandContext,
generateTemplates,
processCurl,
validateEditedTemplate,
validateCurlFields,
};