From 93e0b32454d638bde9a481305ee0e3890489c5fe Mon Sep 17 00:00:00 2001 From: Ritul Date: Fri, 3 Apr 2026 12:14:02 +0530 Subject: [PATCH] Business page visual, view brand buttons, and other stuff --- client/src/App.jsx | 81 ++- client/src/components/BusinessReviewModal.jsx | 354 +++++++++++++ client/src/components/CdnGallery.jsx | 38 ++ .../src/components/RegisterBusinessModal.jsx | 299 ++++++++--- client/src/components/Sidebar.jsx | 46 +- client/src/pages/Brand.jsx | 73 +-- client/src/pages/Businesses.jsx | 470 +++++++++--------- client/src/utils/businessOnboarding.js | 98 ++++ server/routes/businesses.js | 377 +++++++++++--- server/services/crawlSummary.js | 225 +++++++++ server/services/firecrawl.js | 299 +++++++++-- server/services/openai2.js | 46 +- 12 files changed, 1894 insertions(+), 512 deletions(-) create mode 100644 client/src/components/BusinessReviewModal.jsx create mode 100644 client/src/components/CdnGallery.jsx create mode 100644 client/src/utils/businessOnboarding.js create mode 100644 server/services/crawlSummary.js diff --git a/client/src/App.jsx b/client/src/App.jsx index 55ab51a..5a1814d 100644 --- a/client/src/App.jsx +++ b/client/src/App.jsx @@ -1,5 +1,8 @@ +import { useState } from 'react'; import { BrowserRouter, Routes, Route, Navigate, useLocation } from 'react-router-dom'; import { BusinessProvider, useBusiness } from './context/BusinessContext'; +import apiClient from './api/client'; +import BusinessReviewModal from './components/BusinessReviewModal'; import Sidebar from './components/Sidebar'; import Businesses from './pages/Businesses'; import Providers from './pages/Providers'; @@ -9,27 +12,65 @@ import Templates from './pages/Templates'; import { Link } from 'react-router-dom'; function SubLayout({ children }) { - const { activeBusinessId, hasGlobalSms } = useBusiness(); + const { activeBusiness, activeBusinessId, hasGlobalSms } = useBusiness(); + const [reviewBusiness, setReviewBusiness] = useState(null); + const [reviewLoading, setReviewLoading] = useState(false); + const [reviewError, setReviewError] = useState(''); + + async function handleOpenReview() { + if (!activeBusinessId || reviewLoading) return; + + setReviewError(''); + + if (activeBusiness?.scrapeArtifacts?.json) { + setReviewBusiness(activeBusiness); + return; + } + + setReviewLoading(true); + try { + const response = await apiClient.get(`/api/businesses/${activeBusinessId}`); + setReviewBusiness(response.data); + } catch (error) { + setReviewError(error.response?.data?.error || 'Failed to load brand review.'); + } finally { + setReviewLoading(false); + } + } + return ( -
- -
-
- {hasGlobalSms && ( - - - - )} -
-
- {children} -
-
-
+ <> +
+ +
+
+ {hasGlobalSms && ( + + + + )} +
+
+ {children} +
+
+
+ + {reviewBusiness && ( + setReviewBusiness(null)} + /> + )} + ); } diff --git a/client/src/components/BusinessReviewModal.jsx b/client/src/components/BusinessReviewModal.jsx new file mode 100644 index 0000000..83c588c --- /dev/null +++ b/client/src/components/BusinessReviewModal.jsx @@ -0,0 +1,354 @@ +import { useEffect, useMemo } from 'react'; +import CdnGallery from './CdnGallery'; +import { + getBusinessDomain, + getBusinessImage, + getBusinessName, + getBusinessTagline, +} from '../utils/businessProfile'; + +function normalizeText(value) { + return typeof value === 'string' ? value.trim() : ''; +} + +function normalizeUniqueStrings(value) { + if (!Array.isArray(value)) return []; + + const seen = new Set(); + return value + .map((entry) => normalizeText(entry)) + .filter((entry) => { + if (!entry || seen.has(entry)) return false; + seen.add(entry); + return true; + }); +} + +function normalizeColorEntries(value) { + if (!Array.isArray(value)) return []; + + const seen = new Set(); + return value + .map((entry, index) => { + if (typeof entry === 'string') { + const hex = normalizeText(entry); + return hex ? { name: '', hex, key: `${hex}:${index}` } : null; + } + + if (!entry || typeof entry !== 'object') return null; + + const hex = normalizeText(entry.hex || entry.value || entry.color); + if (!hex) return null; + + const name = normalizeText(entry.name || entry.label || entry.role); + return { name, hex, key: `${name}:${hex}` }; + }) + .filter((entry) => { + if (!entry || seen.has(entry.key)) return false; + seen.add(entry.key); + return true; + }); +} + +function extractCdnUrls(business) { + const artifactUrls = business?.scrapeArtifacts?.cdnUrls; + if (Array.isArray(artifactUrls) && artifactUrls.length > 0) { + return normalizeUniqueStrings(artifactUrls); + } + + return normalizeUniqueStrings(business?.relevantImagePaths); +} + +function normalizeScrapeLinks(value) { + if (!Array.isArray(value)) return []; + + const seen = new Set(); + return value + .map((entry) => { + if (typeof entry === 'string') { + const href = normalizeText(entry); + return href ? { href, label: href } : null; + } + + if (!entry || typeof entry !== 'object') return null; + + const href = normalizeText(entry.href || entry.url || entry.link); + if (!href) return null; + + const label = normalizeText(entry.text || entry.title || entry.label || href); + return { href, label }; + }) + .filter((entry) => { + if (!entry || seen.has(entry.href)) return false; + seen.add(entry.href); + return true; + }); +} + +function formatPrettyJson(value) { + if (value == null) return ''; + + if (typeof value === 'string') { + try { + return JSON.stringify(JSON.parse(value), null, 2); + } catch { + return value; + } + } + + try { + return JSON.stringify(value, null, 2); + } catch { + return String(value); + } +} + +function extractColors(business) { + const labeledColors = normalizeColorEntries(business?.scrapeArtifacts?.json?.branding?.labeledColors); + if (labeledColors.length > 0) return labeledColors; + + const directColors = normalizeColorEntries(business?.colors); + if (directColors.length > 0) return directColors; + + const brandingColors = business?.scrapeArtifacts?.json?.branding?.colors; + return normalizeColorEntries(brandingColors); +} + +function extractAboutText(business) { + const directSummary = normalizeText(business?.aboutSummary); + if (directSummary) return directSummary; + + const scrapeJson = business?.scrapeArtifacts?.json; + const aboutExcerpt = normalizeText(scrapeJson?.aboutPage?.excerpt); + if (aboutExcerpt) return aboutExcerpt; + + const representativeAbout = Array.isArray(scrapeJson?.representativeTextBlocks) + ? scrapeJson.representativeTextBlocks.find((block) => normalizeText(block?.pageType) === 'about') + : null; + const representativeAboutText = normalizeText(representativeAbout?.text); + if (representativeAboutText) return representativeAboutText; + + const homepageExcerpt = normalizeText(scrapeJson?.homepage?.excerpt); + if (homepageExcerpt) return homepageExcerpt; + + return normalizeText(scrapeJson?.summaryText); +} + +export default function BusinessReviewModal({ business, onClose }) { + const name = getBusinessName(business); + const domain = getBusinessDomain(business); + const tagline = getBusinessTagline(business); + const image = getBusinessImage(business); + const tone = normalizeText(business?.tone); + const taglines = normalizeUniqueStrings(business?.taglines); + const colors = extractColors(business); + const aboutText = extractAboutText(business); + const cdnUrls = extractCdnUrls(business); + const links = normalizeScrapeLinks(business?.scrapeArtifacts?.links); + const prettyJson = useMemo(() => formatPrettyJson(business?.scrapeArtifacts?.json), [business]); + + useEffect(() => { + const previousBodyOverflow = document.body.style.overflow; + const previousBodyOverscroll = document.body.style.overscrollBehavior; + const previousHtmlOverflow = document.documentElement.style.overflow; + const previousHtmlOverscroll = document.documentElement.style.overscrollBehavior; + + document.body.style.overflow = 'hidden'; + document.body.style.overscrollBehavior = 'none'; + document.documentElement.style.overflow = 'hidden'; + document.documentElement.style.overscrollBehavior = 'none'; + + return () => { + document.body.style.overflow = previousBodyOverflow; + document.body.style.overscrollBehavior = previousBodyOverscroll; + document.documentElement.style.overflow = previousHtmlOverflow; + document.documentElement.style.overscrollBehavior = previousHtmlOverscroll; + }; + }, []); + + return ( +
+
+
+
+

Business review

+

{name}

+

+ {domain + ? `Review the captured storefront context for ${domain}.` + : 'Review the captured storefront context before moving on.'} +

+
+ +
+ +
+
+
+
+
+ {image ? ( + {name} + ) : ( + {name?.[0]?.toUpperCase() || 'B'} + )} +
+
+

{name}

+ {domain &&

{domain}

} + {tagline &&

{tagline}

} +
+ {tone && ( + + Tone: {tone} + + )} + + {cdnUrls.length} image{cdnUrls.length === 1 ? '' : 's'} + + + {links.length} link{links.length === 1 ? '' : 's'} + + + {colors.length} color{colors.length === 1 ? '' : 's'} + +
+
+
+
+ + {aboutText && ( +
+
+

About Company

+

A concise summary of what the brand is about, what it sells, and its overall vibe.

+
+
+
+

{aboutText}

+
+
+
+ )} + + {(taglines.length > 0 || colors.length > 0) && ( +
+ {taglines.length > 0 && ( +
+
+

Taglines

+

Short brand lines captured during onboarding.

+
+
+
+ {taglines.map((entry, index) => ( +

"{entry}"

+ ))} +
+
+
+ )} + + {colors.length > 0 && ( +
+
+

Color Codes

+

Detected brand colors used across the storefront.

+
+
+
+ {colors.map((color) => ( +
+ +
+ {color.name && ( + + {color.name} + + )} + {color.hex} +
+
+ ))} +
+
+
+ )} +
+ )} + + {cdnUrls.length > 0 && ( +
+
+

Images

+

Captured storefront images are available below.

+
+ +
+ )} + + {prettyJson && ( +
+
+

Captured Data

+

Raw storefront data captured during onboarding.

+
+
+
+                    {prettyJson}
+                  
+
+
+ )} + + {links.length > 0 && ( +
+
+

Links

+

Every discovered storefront link is available below.

+
+
+
+ {links.map((link, index) => ( + +

{link.label}

+

{link.href}

+
+ ))} +
+
+
+ )} +
+
+ +
+ +
+
+
+ ); +} diff --git a/client/src/components/CdnGallery.jsx b/client/src/components/CdnGallery.jsx new file mode 100644 index 0000000..c77e3b9 --- /dev/null +++ b/client/src/components/CdnGallery.jsx @@ -0,0 +1,38 @@ +export default function CdnGallery({ urls, compact = false, showLabels = true, clickable = true }) { + if (!urls.length) return null; + + return ( +
+ {urls.map((url, index) => { + const Wrapper = clickable ? 'a' : 'div'; + const wrapperProps = clickable + ? { href: url, target: '_blank', rel: 'noreferrer' } + : {}; + + return ( + +
+ {`Storefront { + event.currentTarget.style.opacity = '0.35'; + }} + /> +
+ {showLabels && ( +
+

{url}

+
+ )} +
+ ); + })} +
+ ); +} diff --git a/client/src/components/RegisterBusinessModal.jsx b/client/src/components/RegisterBusinessModal.jsx index e488935..3ba5c13 100644 --- a/client/src/components/RegisterBusinessModal.jsx +++ b/client/src/components/RegisterBusinessModal.jsx @@ -1,45 +1,130 @@ -import { useState } from 'react'; -import apiClient from '../api/client'; +import { useEffect, useRef, useState } from 'react'; +import { + fetchBusinessOnboardingJob, + getBusinessOnboardingError, + getBusinessOnboardingProgress, + getBusinessOnboardingStageMeta, + shouldRetryMissingBusinessOnboardingJob, + startBusinessOnboardingJob, +} from '../utils/businessOnboarding'; -export default function RegisterBusinessModal({ onClose, onSuccess }) { +export default function RegisterBusinessModal({ onClose, onSuccess, onJobStarted, showCompletionScreen = true }) { const [url, setUrl] = useState(''); const [status, setStatus] = useState('idle'); const [error, setError] = useState(''); + const [job, setJob] = useState(null); + const pollTimerRef = useRef(null); + const cancelledRef = useRef(false); + + useEffect(() => { + return () => { + cancelledRef.current = true; + if (pollTimerRef.current) window.clearTimeout(pollTimerRef.current); + }; + }, []); + + function clearPolling() { + if (pollTimerRef.current) { + window.clearTimeout(pollTimerRef.current); + pollTimerRef.current = null; + } + } + + function schedulePoll(initialJob) { + clearPolling(); + let transientNotFoundMisses = 0; + let currentJob = initialJob; + + const tick = async () => { + try { + const nextJob = await fetchBusinessOnboardingJob(initialJob.jobId); + if (cancelledRef.current) return; + + transientNotFoundMisses = 0; + currentJob = nextJob; + setJob(nextJob); + + if (nextJob.status === 'completed') { + clearPolling(); + + if (typeof onSuccess === 'function' && nextJob.business) { + await Promise.resolve(onSuccess(nextJob.business)); + } + + if (showCompletionScreen && nextJob.business) { + setStatus('success'); + } + return; + } + + if (nextJob.status === 'failed') { + clearPolling(); + setStatus('error'); + return; + } + + pollTimerRef.current = window.setTimeout(tick, 2200); + } catch (err) { + if (cancelledRef.current) return; + + if (shouldRetryMissingBusinessOnboardingJob(currentJob, err, transientNotFoundMisses)) { + transientNotFoundMisses += 1; + pollTimerRef.current = window.setTimeout(tick, 1800); + return; + } + + clearPolling(); + setStatus('error'); + setError(err.response?.data?.error || err.message || 'Failed to fetch onboarding progress.'); + } + }; + + pollTimerRef.current = window.setTimeout(tick, 2500); + } async function handleSubmit(e) { e.preventDefault(); - if (!url.trim()) return; + const normalizedUrl = url.trim(); + if (!normalizedUrl) return; - setStatus('loading'); + setStatus('starting'); setError(''); try { - const res = await apiClient.post('/api/businesses', { - websiteUrl: url.trim(), + const startedJob = await startBusinessOnboardingJob({ + websiteUrl: normalizedUrl, }); - if (typeof onSuccess === 'function') { - await onSuccess(res.data); + if (typeof onJobStarted === 'function') { + await Promise.resolve(onJobStarted(startedJob)); return; } - setStatus('success'); + setJob(startedJob); + setStatus('polling'); + schedulePoll(startedJob); } catch (err) { - setError(err.response?.data?.error || 'Something went wrong. Please try again.'); + setError(err.response?.data?.error || err.message || 'Something went wrong. Please try again.'); setStatus('error'); } } + const stageMeta = getBusinessOnboardingStageMeta(job?.stage || status); + const progress = getBusinessOnboardingProgress(job || {}); + const isFailed = status === 'error' && !!job; + const hasStarted = !!job; + const showSuccessScreen = status === 'success' && showCompletionScreen; + const errorMessage = getBusinessOnboardingError(job) || error; + return (
-
- - {status === 'success' && ( +
+ {showSuccessScreen && (

Business created

-

Storefront captured successfully

+

{job?.business?.brandName || 'Storefront captured successfully'}

- The business has been created and the scraped storefront details are ready for review. + The onboarding job finished successfully and the business is ready for review.

Website URL

@@ -47,69 +132,143 @@ export default function RegisterBusinessModal({ onClose, onSuccess }) {
)} - {(status === 'idle' || status === 'loading' || status === 'error') && ( - <> -
-

Add a Business

-

- Enter the storefront website URL and we'll scrape it to detect the brand and set up your business. + {isFailed && ( +

+
+

Onboarding failed

+

We could not finish creating this business

+

+ {errorMessage || 'The onboarding job stopped before the business could be created.'}

- -
-
- - setUrl(e.target.value)} - placeholder="https://yourstore.com" - disabled={status === 'loading'} - className="w-full px-4 py-2 rounded-lg bg-white border border-gray-300 text-gray-800 placeholder-gray-400 font-medium focus:outline-none focus:ring-2 focus:ring-primary-blue focus:border-transparent transition disabled:opacity-50 text-sm " - required - /> -
- - {status === 'error' && ( -

{error}

- )} - -
- - -
- - {status === 'loading' && ( -

- Fetching the website context and extracting brand details. This may take 20–30 seconds. -

- )} -
- + +
)} + {!showSuccessScreen && !isFailed && ( + <> + {!hasStarted && ( + <> +
+

Add a Business

+

+ Enter the storefront website URL and we'll scrape the homepage, about page, and representative product pages to detect the brand and set up your business. +

+
+ +
+
+ + setUrl(e.target.value)} + placeholder="https://yourstore.com" + disabled={status === 'starting'} + className="w-full px-4 py-2 rounded-lg bg-white border border-gray-300 text-gray-800 placeholder-gray-400 font-medium focus:outline-none focus:ring-2 focus:ring-primary-blue focus:border-transparent transition disabled:opacity-50 text-sm" + required + /> +
+ + {status === 'error' && !job && ( +

+ {error} +

+ )} + +
+ + +
+
+ + )} + + {hasStarted && !showSuccessScreen && ( +
+
+

+ {status === 'error' ? 'Onboarding failed' : 'Onboarding in progress'} +

+

{stageMeta.label}

+

+ {status === 'error' + ? errorMessage || 'The onboarding job failed.' + : stageMeta.note} +

+
+ +
+
+
+
+
+ Status: {job?.status || status} + {progress.pagesDiscovered > 0 ? ( + {progress.pagesProcessed} / {progress.pagesDiscovered} pages + ) : ( + Preparing crawl + )} +
+
+ +
+
+

Pages

+

{progress.pagesProcessed}

+
+
+

Links

+

{progress.linkCount}

+
+
+

Images

+

{progress.imageCount}

+
+
+ + {status === 'error' && ( + + )} +
+ )} + + )}
); diff --git a/client/src/components/Sidebar.jsx b/client/src/components/Sidebar.jsx index b4bf56c..ffe96f5 100644 --- a/client/src/components/Sidebar.jsx +++ b/client/src/components/Sidebar.jsx @@ -59,7 +59,7 @@ function StageMarker({ done, active, enabled }) { return ; } -export default function Sidebar() { +export default function Sidebar({ onOpenReview, reviewLoading = false, reviewError = '' }) { const { activeBusiness, activeBusinessId, @@ -132,13 +132,43 @@ export default function Sidebar() { Switch Business {activeBusiness && ( -
-
- {activeBusiness.brandName?.[0]?.toUpperCase() || 'B'} -
-
-

{activeBusiness.brandName}

-

{activeBusiness.domain}

+
+
+
+ {activeBusiness.brandName?.[0]?.toUpperCase() || 'B'} +
+
+

{activeBusiness.brandName}

+

{activeBusiness.domain}

+ {activeBusinessId && ( + <> +
+ +
+ {reviewError && ( +

{reviewError}

+ )} + + )} +
)} diff --git a/client/src/pages/Brand.jsx b/client/src/pages/Brand.jsx index cb09c97..dea769e 100644 --- a/client/src/pages/Brand.jsx +++ b/client/src/pages/Brand.jsx @@ -1,6 +1,7 @@ import { useState } from 'react'; import { Link } from 'react-router-dom'; import { useBrand } from '../context/BrandContext'; +import BusinessReviewModal from '../components/BusinessReviewModal'; import RegisterBusinessModal from '../components/RegisterBusinessModal'; import apiClient from '../api/client'; @@ -45,6 +46,7 @@ function DeleteConfirmModal({ brandName, onCancel, onConfirm, deleting }) { export default function Brand() { const { brand, loading, refetch } = useBrand(); const [showModal, setShowModal] = useState(false); + const [showReviewModal, setShowReviewModal] = useState(false); const [showDeleteConfirm, setShowDeleteConfirm] = useState(false); const [deleting, setDeleting] = useState(false); const [deleteError, setDeleteError] = useState(''); @@ -122,60 +124,8 @@ export default function Brand() {
- {/* Taglines */} - {brand.taglines?.length > 0 && ( -
-

Taglines

-
- {brand.taglines.map((t, i) => ( -

"{t}"

- ))} -
-
- )} - - {/* Colors */} - {brand.colors?.length > 0 && ( -
-

Brand Colors

-
- {brand.colors.map((c, i) => ( -
-
- {c} -
- ))} -
-
- )}
- {/* Brand images */} - {brand.relevantImagePaths?.length > 0 && ( -
-

Brand Images

-
- {brand.relevantImagePaths.map((url, i) => ( -
- {`brand { e.target.style.opacity = '0.3'; }} - /> -
-

{url}

-
-
- ))} -
-
- )} - {/* Navigation cards */}
{NAV_CARDS.map(card => ( @@ -196,6 +146,13 @@ export default function Brand() { )}
+ + {showDeleteConfirm && ( )} + + {showReviewModal && ( + setShowReviewModal(false)} + /> + )}
); } diff --git a/client/src/pages/Businesses.jsx b/client/src/pages/Businesses.jsx index acb314d..ce696a2 100644 --- a/client/src/pages/Businesses.jsx +++ b/client/src/pages/Businesses.jsx @@ -3,7 +3,16 @@ import { useNavigate } from 'react-router-dom'; import apiClient from '../api/client'; import { useBusiness } from '../context/BusinessContext'; import RegisterBusinessModal from '../components/RegisterBusinessModal'; +import BusinessReviewModal from '../components/BusinessReviewModal'; import { fetchActiveSalesChannels } from '../utils/fyndSalesChannels'; +import { + fetchBusinessOnboardingJob, + getBusinessOnboardingError, + getBusinessOnboardingProgress, + getBusinessOnboardingStageMeta, + shouldRetryMissingBusinessOnboardingJob, + startBusinessOnboardingJob, +} from '../utils/businessOnboarding'; import { getApplicationId, getBusinessDomain, @@ -16,106 +25,6 @@ function normalizeText(value) { return typeof value === 'string' ? value.trim() : ''; } -function normalizeUniqueStrings(value) { - if (!Array.isArray(value)) return []; - - const seen = new Set(); - return value - .map((entry) => normalizeText(entry)) - .filter((entry) => { - if (!entry || seen.has(entry)) return false; - seen.add(entry); - return true; - }); -} - -function extractCdnUrls(business) { - return normalizeUniqueStrings(business?.relevantImagePaths); -} - -function normalizeScrapeLinks(value) { - if (!Array.isArray(value)) return []; - - const seen = new Set(); - return value - .map((entry) => { - if (typeof entry === 'string') { - const href = normalizeText(entry); - return href ? { href, label: href } : null; - } - - if (!entry || typeof entry !== 'object') return null; - - const href = normalizeText(entry.href || entry.url || entry.link); - if (!href) return null; - - const label = normalizeText(entry.text || entry.title || entry.label || href); - return { href, label }; - }) - .filter((entry) => { - if (!entry || seen.has(entry.href)) return false; - seen.add(entry.href); - return true; - }); -} - -function formatPrettyJson(value) { - if (value == null) return ''; - - if (typeof value === 'string') { - try { - return JSON.stringify(JSON.parse(value), null, 2); - } catch { - return value; - } - } - - try { - return JSON.stringify(value, null, 2); - } catch { - return String(value); - } -} - -function CdnGallery({ urls, compact = false, showLabels = true, clickable = true }) { - if (!urls.length) return null; - - return ( -
- {urls.map((url, index) => { - const Wrapper = clickable ? 'a' : 'div'; - const wrapperProps = clickable - ? { href: url, target: '_blank', rel: 'noreferrer' } - : {}; - - return ( - -
- {`Storefront { - event.currentTarget.style.opacity = '0.35'; - }} - /> -
- {showLabels && ( -
-

{url}

-
- )} -
- ); - })} -
- ); -} - function DeleteConfirmModal({ businessName, onCancel, onConfirm, deleting }) { return (
@@ -148,143 +57,94 @@ function DeleteConfirmModal({ businessName, onCancel, onConfirm, deleting }) { ); } -function BusinessCreatedModal({ business, onClose }) { - const name = getBusinessName(business); - const domain = getBusinessDomain(business); - const tagline = getBusinessTagline(business); - const image = getBusinessImage(business); - const cdnUrls = extractCdnUrls(business?.scrapeArtifacts?.cdnUrls?.length ? { relevantImagePaths: business.scrapeArtifacts.cdnUrls } : business); - const links = normalizeScrapeLinks(business?.scrapeArtifacts?.links); - const prettyJson = useMemo(() => formatPrettyJson(business?.scrapeArtifacts?.json), [business]); - - useEffect(() => { - const previousBodyOverflow = document.body.style.overflow; - const previousBodyOverscroll = document.body.style.overscrollBehavior; - const previousHtmlOverflow = document.documentElement.style.overflow; - const previousHtmlOverscroll = document.documentElement.style.overscrollBehavior; - - document.body.style.overflow = 'hidden'; - document.body.style.overscrollBehavior = 'none'; - document.documentElement.style.overflow = 'hidden'; - document.documentElement.style.overscrollBehavior = 'none'; - - return () => { - document.body.style.overflow = previousBodyOverflow; - document.body.style.overscrollBehavior = previousBodyOverscroll; - document.documentElement.style.overflow = previousHtmlOverflow; - document.documentElement.style.overscrollBehavior = previousHtmlOverscroll; - }; - }, []); +function BusinessOnboardingModal({ job, onClose }) { + const status = normalizeText(job?.status); + const stageMeta = getBusinessOnboardingStageMeta(job?.stage || status); + const progress = getBusinessOnboardingProgress(job); + const isFailed = status === 'failed'; + const isCompleted = status === 'completed'; + const discoveredPages = progress.pagesDiscovered; + const processedPages = progress.pagesProcessed; + const progressWidth = stageMeta.percent; + const errorMessage = getBusinessOnboardingError(job); return (
-
-
-
-

Business created

-

{name}

-

- {domain - ? `Scrape completed for ${domain}. Review the captured assets below before moving on.` - : 'Scrape completed. Review the captured assets below before moving on.'} -

-
- +
+
+

+ {isFailed ? 'Onboarding failed' : isCompleted ? 'Business ready' : 'Setting up business'} +

+

{stageMeta.label}

+

+ {isFailed + ? 'The onboarding job could not be completed. You can close this dialog and try again.' + : isCompleted + ? 'The storefront crawl and brand analysis finished successfully.' + : stageMeta.note} +

-
-
-
-
-
- {image ? ( - {name} +
+ {!isFailed && ( + <> +
+
+
+
+
+ Status: {status || 'pending'} + {discoveredPages > 0 ? ( + {processedPages} / {discoveredPages} pages ) : ( - {name?.[0]?.toUpperCase() || 'B'} + Preparing crawl )}
-
-

{name}

- {domain &&

{domain}

} - {tagline &&

{tagline}

} -
- - {cdnUrls.length} image{cdnUrls.length === 1 ? '' : 's'} - - - {links.length} link{links.length === 1 ? '' : 's'} - -
+
+ +
+
+

Pages

+

{processedPages}

+
+
+

Links

+

{progress.linkCount}

+
+
+

Images

+

{progress.imageCount}

+ + )} + + {isFailed && ( +
+ {errorMessage || 'Business onboarding failed.'}
- - {cdnUrls.length > 0 && ( -
-
-

Images

-

Captured storefront images are available below.

-
- -
)} - - {prettyJson && ( -
-
-

Captured Data

-

Raw storefront data captured during onboarding.

-
-
-
-                  {prettyJson}
-                
-
-
- )} - - {links.length > 0 && ( -
-
-

Links

-

Every discovered storefront link is available below.

-
-
-
- {links.map((link, index) => ( - -

{link.label}

-

{link.href}

-
- ))} -
-
-
- )} -
-
- +
+ {isFailed ? ( + + ) : ( + + )}
@@ -314,10 +174,12 @@ function UnifiedBusinessCard({ item, selectingBusinessId, creatingSalesChannelId, + reviewLoadingBusinessId, onSelect, onImport, onDelete, onFallback, + onReview, }) { const entity = item.business || item.channel; const businessId = item.business?.businessId || ''; @@ -327,9 +189,9 @@ function UnifiedBusinessCard({ const domain = getBusinessDomain(entity); const tagline = getBusinessTagline(entity); const isScraped = item.status === 'scraped'; - const cdnUrls = extractCdnUrls(item.business); const isOpening = isScraped && selectingBusinessId === businessId; const isImporting = !isScraped && creatingSalesChannelId === channelId; + const isLoadingReview = isScraped && reviewLoadingBusinessId === businessId; const hasWebsiteUrl = Boolean(item.channel?.websiteUrl); const canOpenBusiness = isScraped && item.business && !isOpening; @@ -393,17 +255,6 @@ function UnifiedBusinessCard({

)} - {isScraped && cdnUrls.length > 0 && ( -
-
-

Images

- - {cdnUrls.length} image{cdnUrls.length === 1 ? '' : 's'} - -
- -
- )}
@@ -418,6 +269,16 @@ function UnifiedBusinessCard({ > Delete + ) : ( <> @@ -432,10 +293,10 @@ function UnifiedBusinessCard({ disabled={isImporting} className="text-sm text-primary-blue font-semibold group-hover:underline disabled:opacity-60" > - {isImporting ? 'Scraping…' : hasWebsiteUrl ? 'Scrape →' : 'Use Fallback URL →'} + {isImporting ? 'Onboarding…' : hasWebsiteUrl ? 'Start onboarding →' : 'Use fallback URL →'} - {hasWebsiteUrl ? 'Ready to scrape' : 'Needs manual URL'} + {hasWebsiteUrl ? 'Ready to onboard' : 'Needs manual URL'} )} @@ -455,10 +316,15 @@ export default function Businesses() { const [selectingBusinessId, setSelectingBusinessId] = useState(''); const [creatingSalesChannelId, setCreatingSalesChannelId] = useState(''); const [createdBusiness, setCreatedBusiness] = useState(null); + const [onboardingJob, setOnboardingJob] = useState(null); + const [showOnboardingModal, setShowOnboardingModal] = useState(false); const [showModal, setShowModal] = useState(false); const [deleteTarget, setDeleteTarget] = useState(null); const [deleting, setDeleting] = useState(false); const [error, setError] = useState(''); + const [reviewBusiness, setReviewBusiness] = useState(null); + const [reviewLoadingBusinessId, setReviewLoadingBusinessId] = useState(''); + const onboardingJobCreatedAt = onboardingJob?.createdAt; const showUnifiedSalesChannelView = salesChannelsStatus === 'success'; @@ -557,8 +423,9 @@ export default function Businesses() { useEffect(() => { load(); }, [load]); - async function handleBusinessCreated(created) { + const handleBusinessCreated = useCallback(async (created) => { setShowModal(false); + setShowOnboardingModal(false); setCreatedBusiness(created); try { @@ -567,7 +434,73 @@ export default function Businesses() { } catch (err) { setError(err.response?.data?.error || 'Business was created, but the business list could not be refreshed.'); } - } + }, [loadBusinesses, loadSalesChannels]); + + const handleBusinessJobStarted = useCallback(async (job) => { + setError(''); + setShowModal(false); + setCreatedBusiness(null); + setOnboardingJob(job); + setShowOnboardingModal(true); + }, []); + + useEffect(() => { + if (!onboardingJob?.jobId) return undefined; + if (onboardingJob.status === 'completed') return undefined; + + let cancelled = false; + let timeoutId = null; + let transientNotFoundMisses = 0; + + async function pollJob() { + try { + const nextJob = await fetchBusinessOnboardingJob(onboardingJob.jobId); + if (cancelled) return; + + transientNotFoundMisses = 0; + setOnboardingJob(nextJob); + + if (nextJob.status === 'completed' && nextJob.business) { + await handleBusinessCreated(nextJob.business); + if (!cancelled) { + setOnboardingJob(null); + setShowOnboardingModal(false); + } + return; + } + + if (nextJob.status === 'failed') { + setShowOnboardingModal(true); + return; + } + + timeoutId = window.setTimeout(pollJob, 2200); + } catch (err) { + if (cancelled) return; + + if (shouldRetryMissingBusinessOnboardingJob({ createdAt: onboardingJobCreatedAt }, err, transientNotFoundMisses)) { + transientNotFoundMisses += 1; + timeoutId = window.setTimeout(pollJob, 1800); + return; + } + + setOnboardingJob((current) => ({ + ...(current || {}), + status: 'failed', + stage: 'failed', + error: { message: err.response?.data?.error || 'Failed to fetch onboarding progress.' }, + })); + setShowOnboardingModal(true); + } + } + + timeoutId = window.setTimeout(pollJob, 2500); + + return () => { + cancelled = true; + if (timeoutId) window.clearTimeout(timeoutId); + }; + }, [handleBusinessCreated, onboardingJob?.jobId, onboardingJob?.status, onboardingJobCreatedAt]); async function handleSelect(biz) { setSelectingBusinessId(biz.businessId); @@ -595,11 +528,11 @@ export default function Businesses() { setError(''); try { - const res = await apiClient.post('/api/businesses', { + const job = await startBusinessOnboardingJob({ applicationId, websiteUrl: channel.websiteUrl, }); - await handleBusinessCreated(res.data); + await handleBusinessJobStarted(job); } catch (err) { setError(err.response?.data?.error || 'Failed to add business from sales channel'); } finally { @@ -621,6 +554,27 @@ export default function Businesses() { } } + async function handleOpenReview(business) { + if (!business?.businessId || reviewLoadingBusinessId) return; + + setReviewLoadingBusinessId(business.businessId); + setError(''); + + try { + if (business?.scrapeArtifacts?.json) { + setReviewBusiness(business); + return; + } + + const response = await apiClient.get(`/api/businesses/${business.businessId}`); + setReviewBusiness(response.data); + } catch (err) { + setError(err.response?.data?.error || 'Failed to load brand review.'); + } finally { + setReviewLoadingBusinessId(''); + } + } + if (loading) { return (
@@ -639,8 +593,8 @@ export default function Businesses() {

{showUnifiedSalesChannelView - ? 'View every connected sales channel in one place and scrape the ones that are not onboarded yet.' - : 'Add a storefront URL and we’ll scrape it to set up your business.'} + ? 'View every connected sales channel in one place and onboard the ones that are not scraped yet.' + : 'Add a storefront URL and we’ll scrape the homepage, about page, and representative product pages to set up your business.'}

{!showUnifiedSalesChannelView && ( @@ -687,10 +641,12 @@ export default function Businesses() { item={item} selectingBusinessId={selectingBusinessId} creatingSalesChannelId={creatingSalesChannelId} + reviewLoadingBusinessId={reviewLoadingBusinessId} onSelect={handleSelect} onImport={handleCreateFromSalesChannel} onDelete={setDeleteTarget} onFallback={() => setShowModal(true)} + onReview={handleOpenReview} /> ))}
@@ -730,10 +686,12 @@ export default function Businesses() { item={{ key: `fallback:${biz.businessId}`, status: 'scraped', business: biz, channel: null }} selectingBusinessId={selectingBusinessId} creatingSalesChannelId={creatingSalesChannelId} + reviewLoadingBusinessId={reviewLoadingBusinessId} onSelect={handleSelect} onImport={handleCreateFromSalesChannel} onDelete={setDeleteTarget} onFallback={() => setShowModal(true)} + onReview={handleOpenReview} /> ))}
@@ -750,10 +708,32 @@ export default function Businesses() { {showModal && ( { setShowModal(false); load(); }} - onSuccess={handleBusinessCreated} + onJobStarted={handleBusinessJobStarted} + /> + )} + {onboardingJob && showOnboardingModal && ( + setShowOnboardingModal(false)} + /> + )} + {createdBusiness && ( + setCreatedBusiness(null)} + /> + )} + {reviewBusiness && ( + setReviewBusiness(null)} /> )} - {createdBusiness && setCreatedBusiness(null)} />} {deleteTarget && ( entry.key === normalizedStage) || STAGE_SEQUENCE[0]; + const index = STAGE_SEQUENCE.findIndex((entry) => entry.key === found.key); + const percent = normalizedStage === 'completed' + ? 100 + : normalizedStage === 'failed' + ? 100 + : Math.max(8, Math.min(92, index <= 0 ? 12 : 12 + (index * 20))); + + return { + stage: found.key, + label: found.label, + note: found.note, + index, + percent, + totalStages: STAGE_SEQUENCE.length - 2, + }; +} + +export function getBusinessOnboardingProgress(job = {}) { + const progress = job?.progress && typeof job.progress === 'object' ? job.progress : {}; + return { + pagesProcessed: Number(progress.pagesProcessed || 0), + pagesDiscovered: Number(progress.pagesDiscovered || 0), + creditsUsed: Number(progress.creditsUsed || 0), + representativePages: Number(progress.representativePages || 0), + imageCount: Number(progress.imageCount || 0), + linkCount: Number(progress.linkCount || 0), + }; +} + +export function getBusinessOnboardingError(job = {}) { + const error = job?.error; + if (!error) return ''; + if (typeof error === 'string') return error; + if (typeof error === 'object') { + return String(error.message || error.error || error.details || '').trim(); + } + + return String(error).trim(); +} + +export function isRetryableOnboardingJobLookupError(error) { + if (error?.response?.status !== 404) return false; + + const message = String(error?.response?.data?.error || error?.message || '').trim(); + return /onboarding job not found/i.test(message); +} + +export function shouldRetryOnboardingJobLookup(error, missCount = 0) { + return isRetryableOnboardingJobLookupError(error) && missCount < JOB_LOOKUP_RETRY_LIMIT; +} + +export function getOnboardingJobRetryDelay(missCount = 0) { + return Math.min(4500, 1500 + (Math.max(0, missCount) * 700)); +} + +export function shouldRetryMissingBusinessOnboardingJob(job = {}, error, missCount = 0) { + if (error?.response?.status !== 404) return false; + if (missCount >= MAX_RETRYABLE_JOB_NOT_FOUND_MISSES) return false; + + const createdAtMs = Date.parse(String(job?.createdAt || '').trim()); + if (!Number.isFinite(createdAtMs)) return false; + + return Date.now() - createdAtMs <= RETRYABLE_JOB_NOT_FOUND_WINDOW_MS; +} diff --git a/server/routes/businesses.js b/server/routes/businesses.js index ab9154e..78f46a3 100644 --- a/server/routes/businesses.js +++ b/server/routes/businesses.js @@ -1,9 +1,10 @@ const express = require('express'); const router = express.Router(); const { v4: uuidv4 } = require('uuid'); -const { scrape } = require('../services/firecrawl'); +const { buildBrandContextPlan, collectBrandContextPages } = require('../services/firecrawl'); const { parseBrandContext, generateTemplates, processCurl, validateCurlFields } = require('../services/openai2'); const { sendViaWorkflow } = require('../services/workflowSender'); +const { buildCrawlSummary } = require('../services/crawlSummary'); const { uploadJSON, fetchJSON, @@ -235,6 +236,249 @@ function mergeBusinessSummary(baseBusiness = {}, context = null) { }; } +function onboardingJobsRoot(companyId) { + return `${companyId}/jobs`; +} + +function buildScrapeArtifacts(crawlSummary, imagePaths = []) { + return { + cdnUrls: normalizeUrlList(imagePaths), + links: Array.isArray(crawlSummary?.links) ? crawlSummary.links : [], + json: crawlSummary && typeof crawlSummary === 'object' ? crawlSummary : {}, + }; +} + +function extractAboutSummary(crawlSummary = {}) { + return normalizeText( + crawlSummary?.aboutPage?.excerpt + || crawlSummary?.aboutPage?.description + || crawlSummary?.homepage?.description + || crawlSummary?.homepage?.excerpt + || '' + ); +} + +function buildJobResponse(job) { + return { + jobId: normalizeText(job?.jobId), + status: normalizeText(job?.status), + stage: normalizeText(job?.stage), + companyId: normalizeScopeId(job?.companyId), + applicationId: normalizeScopeId(job?.applicationId), + websiteUrl: normalizeWebsiteUrl(job?.websiteUrl), + progress: job?.progress && typeof job.progress === 'object' ? job.progress : {}, + business: job?.business && typeof job.business === 'object' ? job.business : null, + error: job?.error && typeof job.error === 'object' ? job.error : null, + createdAt: normalizeText(job?.createdAt), + updatedAt: normalizeText(job?.updatedAt), + }; +} + +function wait(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +async function loadOnboardingJob(companyId, jobId) { + return fetchJSON(onboardingJobsRoot(companyId), jobId); +} + +async function loadOnboardingJobWithRetry(companyId, jobId, options = {}) { + const attempts = Number.isFinite(options.attempts) ? options.attempts : 6; + const delayMs = Number.isFinite(options.delayMs) ? options.delayMs : 350; + + for (let attempt = 0; attempt < attempts; attempt += 1) { + const job = await loadOnboardingJob(companyId, jobId); + if (job) return job; + + if (attempt < attempts - 1) { + await wait(delayMs); + } + } + + return null; +} + +async function saveOnboardingJob(job) { + const normalizedJob = { + ...job, + updatedAt: new Date().toISOString(), + }; + await uploadJSON(onboardingJobsRoot(normalizedJob.companyId), normalizedJob.jobId, normalizedJob); + return normalizedJob; +} + +async function finalizeBusinessFromCrawlJob(job, crawlSummary, brandContext) { + const merchantId = job.companyId; + const applicationId = normalizeScopeId(job.applicationId); + const websiteUrl = normalizeWebsiteUrl(job.websiteUrl); + + if (applicationId) { + const existingBusiness = await findBusinessByApplicationId(merchantId, applicationId); + if (existingBusiness) { + const existingContext = await fetchJSON(businessRoot(merchantId, existingBusiness.businessId), 'context').catch(() => null); + const mergedBusiness = existingContext ? { ...existingContext } : mergeBusinessSummary(existingBusiness); + return { + business: { + ...mergedBusiness, + scrapeArtifacts: buildScrapeArtifacts(crawlSummary, mergedBusiness.relevantImagePaths), + }, + reusedExistingBusiness: true, + }; + } + } + + const businesses = await getIndex(merchantId); + const businessId = uuidv4(); + const bizRoot = businessRoot(merchantId, businessId); + const imagesFolder = `${bizRoot}/images`; + const imagePaths = []; + const imageCandidates = normalizeUrlList(brandContext?.relevantImageUrls); + + for (let i = 0; i < Math.min(imageCandidates.length, 6); i += 1) { + const uploaded = await uploadImageFromUrl(imageCandidates[i], imagesFolder, `image_${i + 1}`); + if (uploaded) imagePaths.push(uploaded); + } + + let domain = normalizeText(crawlSummary?.domain); + if (!domain) { + try { + domain = new URL(websiteUrl).hostname; + } catch { + domain = ''; + } + } + + const now = new Date().toISOString(); + const contextJson = { + businessId, + merchantId, + companyId: merchantId, + applicationId, + domain, + brandName: brandContext.brandName || 'Unknown Brand', + tone: brandContext.tone || 'professional', + taglines: Array.isArray(brandContext.taglines) ? brandContext.taglines : [], + colors: Array.isArray(brandContext.colors) ? brandContext.colors : [], + relevantImagePaths: imagePaths, + aboutSummary: normalizeText(brandContext.aboutSummary) || extractAboutSummary(crawlSummary), + websiteUrl, + crawlStats: crawlSummary?.siteStats || {}, + createdAt: now, + updatedAt: now, + }; + + await uploadJSON(bizRoot, 'context', contextJson); + await uploadJSON(bizRoot, 'crawl_summary', crawlSummary); + await uploadJSON(bizRoot, 'events', { events: DEFAULT_EVENTS }); + + const previewSummary = getBusinessPreviewSummary(contextJson); + businesses.push({ + businessId, + companyId: merchantId, + applicationId, + brandName: contextJson.brandName, + domain: contextJson.domain, + previewTagline: previewSummary.previewTagline, + previewImagePath: previewSummary.previewImagePath, + relevantImagePaths: normalizeUrlList(contextJson.relevantImagePaths), + createdAt: contextJson.createdAt, + updatedAt: contextJson.updatedAt, + }); + await saveIndex(merchantId, businesses); + + return { + business: { + ...contextJson, + scrapeArtifacts: buildScrapeArtifacts(crawlSummary, imagePaths), + }, + reusedExistingBusiness: false, + }; +} + +async function advanceOnboardingJob(job) { + if (!job || typeof job !== 'object') { + throw createHttpError(404, 'Onboarding job not found'); + } + + if (job.status === 'completed' || job.status === 'failed') { + return job; + } + + try { + let pagePlan = job?.pagePlan && typeof job.pagePlan === 'object' ? job.pagePlan : null; + if (!pagePlan) { + job.status = 'crawling'; + job.stage = 'crawling'; + await saveOnboardingJob(job); + + pagePlan = await buildBrandContextPlan(job.websiteUrl); + job.pagePlan = pagePlan; + job.progress = { + ...(job.progress || {}), + pagesProcessed: 1, + pagesDiscovered: 1 + + (pagePlan.aboutUrl ? 1 : 0) + + (Array.isArray(pagePlan.productUrls) ? pagePlan.productUrls.length : 0) + + (pagePlan.discoveryUrl ? 1 : 0), + imageCount: Array.isArray(pagePlan.homepage?.images) ? pagePlan.homepage.images.length : 0, + linkCount: Array.isArray(pagePlan.homepage?.links) ? pagePlan.homepage.links.length : 0, + }; + return saveOnboardingJob(job); + } + + let crawlSummary = job?.crawlSummary && typeof job.crawlSummary === 'object' ? job.crawlSummary : null; + if (!crawlSummary) { + job.status = 'summarizing'; + job.stage = 'summarizing'; + await saveOnboardingJob(job); + + const pageSet = await collectBrandContextPages(pagePlan); + crawlSummary = buildCrawlSummary(pageSet, job.websiteUrl); + job.crawlSummary = crawlSummary; + delete job.pagePlan; + job.progress = { + ...(job.progress || {}), + pagesProcessed: crawlSummary.pageCount || 0, + pagesDiscovered: crawlSummary.pageCount || 0, + representativePages: Array.isArray(crawlSummary.representativePages) ? crawlSummary.representativePages.length : 0, + imageCount: Array.isArray(crawlSummary.topImages) ? crawlSummary.topImages.length : 0, + linkCount: Array.isArray(crawlSummary.links) ? crawlSummary.links.length : 0, + }; + return saveOnboardingJob(job); + } + + let brandContext = job?.brandContext && typeof job.brandContext === 'object' ? job.brandContext : null; + if (!brandContext) { + job.status = 'parsing_brand'; + job.stage = 'parsing_brand'; + await saveOnboardingJob(job); + brandContext = await parseBrandContext(crawlSummary); + job.brandContext = brandContext; + job.status = 'finalizing_business'; + job.stage = 'finalizing_business'; + return saveOnboardingJob(job); + } + + job.status = 'finalizing_business'; + job.stage = 'finalizing_business'; + await saveOnboardingJob(job); + + const result = await finalizeBusinessFromCrawlJob(job, crawlSummary, brandContext); + job.status = 'completed'; + job.stage = 'completed'; + job.business = result.business; + job.error = null; + return saveOnboardingJob(job); + } catch (error) { + job.status = 'failed'; + job.stage = 'failed'; + job.error = { + message: error.message || 'Business onboarding failed', + }; + return saveOnboardingJob(job); + } +} + const LEGACY_DEFAULT_EVENT_SLUGS = new Set(['confirmed', 'pack', 'cancelled']); const EVENT_TEMPLATE_FALLBACKS = { bag_confirmed: ['confirmed'], @@ -818,7 +1062,7 @@ router.get('/', async (req, res) => { } }); -// POST /api/businesses — create new business from websiteUrl with optional applicationId +// POST /api/businesses — start async business onboarding from websiteUrl with optional applicationId router.post('/', async (req, res) => { try { const merchantId = getCompanyId(req); @@ -841,87 +1085,58 @@ router.post('/', async (req, res) => { { code: 'MISSING_WEBSITE_URL' } ); } - const businesses = await getIndex(merchantId); + const businesses = await getIndex(merchantId); if (applicationId && businesses.some((business) => normalizeScopeId(business.applicationId) === applicationId)) { return res.status(409).json({ error: 'A business is already configured for this applicationId' }); } - const businessId = uuidv4(); - const bizRoot = businessRoot(merchantId, businessId); - const imagesFolder = `${bizRoot}/images`; + const now = new Date().toISOString(); + const job = await saveOnboardingJob({ + jobId: uuidv4(), + companyId: merchantId, + applicationId, + websiteUrl, + status: 'queued', + stage: 'queued', + progress: { + pagesProcessed: 0, + pagesDiscovered: 0, + representativePages: 0, + imageCount: 0, + linkCount: 0, + }, + crawlSummary: null, + brandContext: null, + business: null, + error: null, + createdAt: now, + updatedAt: now, + }); - // 1. Scrape - const scrapedData = await scrape(websiteUrl); + res.status(202).json(buildJobResponse(job)); + } catch (err) { + console.error('Start business onboarding error:', err.message); + sendRouteError(res, err); + } +}); - // 2. Parse brand context - const brandContext = await parseBrandContext(scrapedData); - - // 3. Upload relevant images - const imagePaths = []; - for (let i = 0; i < Math.min((brandContext.relevantImageUrls || []).length, 5); i++) { - const url = await uploadImageFromUrl(brandContext.relevantImageUrls[i], imagesFolder, `image_${i + 1}`); - if (url) imagePaths.push(url); +// GET /api/businesses/jobs/:jobId +router.get('/jobs/:jobId', async (req, res) => { + try { + const companyId = getCompanyId(req); + if (!companyId) { + throw createHttpError(400, 'companyId is required'); } - // 4. Build and upload context.json - let domain = ''; - try { domain = new URL(websiteUrl).hostname; } catch { } + const job = await loadOnboardingJobWithRetry(companyId, req.params.jobId); + if (!job) { + throw createHttpError(404, 'Onboarding job not found'); + } - const contextJson = { - businessId, - merchantId, - companyId: merchantId, - applicationId, - domain, - brandName: brandContext.brandName || 'Unknown Brand', - tone: brandContext.tone || 'professional', - taglines: brandContext.taglines || [], - colors: brandContext.colors || [], - relevantImagePaths: imagePaths, - createdAt: new Date().toISOString(), - updatedAt: new Date().toISOString(), - }; - await uploadJSON(bizRoot, 'context', contextJson); - - const scrapeArtifacts = { - cdnUrls: normalizeUrlList(imagePaths), - links: Array.isArray(scrapedData.links) ? scrapedData.links : [], - json: scrapedData?.json && typeof scrapedData.json === 'object' - ? scrapedData.json - : { - markdown: scrapedData.markdown || '', - links: Array.isArray(scrapedData.links) ? scrapedData.links : [], - metadata: scrapedData.metadata || {}, - images: Array.isArray(scrapedData.images) ? scrapedData.images : [], - }, - }; - - // 5. Init events.json - await uploadJSON(bizRoot, 'events', { events: DEFAULT_EVENTS }); - - // 6. Update index.json - const previewSummary = getBusinessPreviewSummary(contextJson); - businesses.push({ - businessId, - companyId: merchantId, - applicationId, - brandName: contextJson.brandName, - domain: contextJson.domain, - previewTagline: previewSummary.previewTagline, - previewImagePath: previewSummary.previewImagePath, - relevantImagePaths: normalizeUrlList(contextJson.relevantImagePaths), - createdAt: contextJson.createdAt, - updatedAt: contextJson.updatedAt, - }); - await saveIndex(merchantId, businesses); - - res.json({ - ...contextJson, - scrapeArtifacts, - }); + const updatedJob = await advanceOnboardingJob(job); + res.json(buildJobResponse(updatedJob)); } catch (err) { - console.error('Create business error:', err.message); sendRouteError(res, err); } }); @@ -930,9 +1145,23 @@ router.post('/', async (req, res) => { router.get('/:businessId', async (req, res) => { try { const { businessId } = req.params; - const context = await fetchJSON(businessRoot(getCompanyId(req), businessId), 'context'); + const merchantId = getCompanyId(req); + const root = businessRoot(merchantId, businessId); + const [context, crawlSummary] = await Promise.all([ + fetchJSON(root, 'context'), + fetchJSON(root, 'crawl_summary').catch(() => null), + ]); + if (!context) return res.status(404).json({ error: 'Business not found' }); - res.json(context); + + if (!crawlSummary) { + return res.json(context); + } + + res.json({ + ...context, + scrapeArtifacts: buildScrapeArtifacts(crawlSummary, context.relevantImagePaths), + }); } catch (err) { res.status(500).json({ error: err.message }); } diff --git a/server/services/crawlSummary.js b/server/services/crawlSummary.js new file mode 100644 index 0000000..5f69dd6 --- /dev/null +++ b/server/services/crawlSummary.js @@ -0,0 +1,225 @@ +function normalizeText(value) { + return typeof value === 'string' ? value.trim() : ''; +} + +function normalizeList(value) { + return Array.isArray(value) ? value : []; +} + +function uniqueStrings(values) { + const seen = new Set(); + return normalizeList(values) + .map((value) => normalizeText(value)) + .filter((value) => { + if (!value || seen.has(value)) return false; + seen.add(value); + return true; + }); +} + +function isHexColor(value) { + return /^#(?:[0-9a-f]{3}|[0-9a-f]{6}|[0-9a-f]{8})$/i.test(normalizeText(value)); +} + +function toColorEntry(name, value) { + const hex = normalizeText(value); + if (!isHexColor(hex)) return null; + + return { + name: normalizeText(name) || 'color', + hex: hex.toUpperCase(), + }; +} + +function extractHostname(url) { + try { + return new URL(url).hostname.replace(/^www\./i, '').toLowerCase(); + } catch { + return ''; + } +} + +function excerptText(page) { + const summary = normalizeText(page?.summary); + if (summary) return summary.slice(0, 800); + + return normalizeText(page?.markdown) + .replace(/\n{3,}/g, '\n\n') + .slice(0, 1600); +} + +function normalizeLinkItem(link) { + if (typeof link === 'string') { + const href = normalizeText(link); + return href ? { href, label: href } : null; + } + + if (!link || typeof link !== 'object') return null; + + const href = normalizeText(link.href || link.url || link.link); + if (!href) return null; + + return { + href, + label: normalizeText(link.text || link.title || link.label) || href, + }; +} + +function dedupeLinks(links) { + const seen = new Set(); + return normalizeList(links) + .map(normalizeLinkItem) + .filter((link) => { + if (!link || seen.has(link.href)) return false; + seen.add(link.href); + return true; + }); +} + +function summarizePage(page, pageType) { + const metadata = page?.metadata && typeof page.metadata === 'object' ? page.metadata : {}; + + return { + url: normalizeText(page?.url), + type: pageType, + title: normalizeText(metadata.title || metadata.ogTitle), + description: normalizeText(metadata.description || metadata.ogDescription), + excerpt: excerptText(page), + linkCount: normalizeList(page?.links).length, + imageCount: uniqueStrings(page?.images).length, + }; +} + +function buildRepresentativeTextBlocks(homepage, aboutPage, productPages) { + return [homepage, aboutPage, ...productPages] + .filter(Boolean) + .map((page) => ({ + url: page.url, + title: page.title, + pageType: page.type, + text: page.excerpt, + })); +} + +function flattenBranding(homepage) { + const branding = homepage?.branding && typeof homepage.branding === 'object' ? homepage.branding : {}; + const colorEntries = []; + const logos = []; + const brandNames = []; + + const colorSource = branding.colors || branding.colorPalette || branding.palette; + if (Array.isArray(colorSource)) { + colorSource.forEach((color, index) => { + if (typeof color === 'string') { + const entry = toColorEntry(`color_${index + 1}`, color); + if (entry) colorEntries.push(entry); + return; + } + + if (color && typeof color === 'object') { + const entry = toColorEntry( + color.name || color.label || color.role || `color_${index + 1}`, + color.hex || color.value || color.color + ); + if (entry) colorEntries.push(entry); + } + }); + } else if (colorSource && typeof colorSource === 'object') { + Object.entries(colorSource).forEach(([name, value]) => { + const entry = toColorEntry(name, value); + if (entry) colorEntries.push(entry); + }); + } + + normalizeList(branding.logos || branding.logoUrls || branding.logo_urls).forEach((logo) => { + if (typeof logo === 'string') { + logos.push(logo); + } else if (logo && typeof logo === 'object') { + logos.push(logo.url || logo.src || ''); + } + }); + + const brandName = normalizeText(branding.brandName || branding.brand_name || branding.name); + if (brandName) brandNames.push(brandName); + + return { + colors: uniqueStrings(colorEntries.map((entry) => entry.hex)), + labeledColors: colorEntries.filter((entry, index, values) => ( + values.findIndex((candidate) => candidate.name === entry.name && candidate.hex === entry.hex) === index + )), + logos: uniqueStrings(logos), + brandNames: uniqueStrings(brandNames), + }; +} + +function buildSummaryText(startUrl, homepage, aboutPage, productPages) { + const blocks = [`Site: ${startUrl}`]; + + [homepage, aboutPage, ...productPages].filter(Boolean).forEach((page, index) => { + blocks.push([ + `Page ${index + 1}: ${page.title || page.url}`, + `Type: ${page.type}`, + page.description ? `Description: ${page.description}` : '', + page.excerpt ? `Excerpt: ${page.excerpt}` : '', + ].filter(Boolean).join('\n')); + }); + + return blocks.join('\n\n').slice(0, 24000); +} + +function buildCrawlSummary(data = {}, startUrlOverride = '') { + const startUrl = normalizeText(startUrlOverride || data.startUrl); + const homepageRaw = data.homepage || null; + const aboutRaw = data.aboutPage || null; + const productRawPages = normalizeList(data.productPages); + const domain = extractHostname(startUrl || homepageRaw?.url || ''); + + const homepage = homepageRaw ? summarizePage(homepageRaw, 'home') : null; + const aboutPage = aboutRaw ? summarizePage(aboutRaw, 'about') : null; + const productPages = productRawPages.map((page) => summarizePage(page, 'product')); + const representativePages = [homepage, aboutPage, ...productPages].filter(Boolean); + const representativeTextBlocks = buildRepresentativeTextBlocks(homepage, aboutPage, productPages); + + const homepageLinks = dedupeLinks(data?.links?.homepage || homepageRaw?.links || []); + const discoveryLinks = dedupeLinks(data?.links?.discovery || []); + const links = dedupeLinks([...homepageLinks, ...discoveryLinks]); + const topImages = uniqueStrings([ + ...normalizeList(homepageRaw?.images), + ...normalizeList(aboutRaw?.images), + ...productRawPages.flatMap((page) => normalizeList(page?.images)), + ]).slice(0, 60); + const branding = flattenBranding(homepageRaw); + + return { + startUrl, + domain, + pageCount: representativePages.length, + siteStats: { + totalPages: representativePages.length, + totalLinks: links.length, + totalImages: topImages.length, + aboutPages: aboutPage ? 1 : 0, + productPages: productPages.length, + }, + homepage, + aboutPage, + contactPage: null, + policyPages: [], + productPages, + representativePages, + representativeTextBlocks, + keyPages: { + about: aboutPage ? [aboutPage] : [], + products: productPages, + }, + navigation: homepageLinks.slice(0, 30), + links, + socialLinks: links.filter((link) => /instagram|facebook|x\.com|twitter|linkedin|youtube|pinterest/i.test(link.href)), + topImages, + screenshots: [], + branding, + summaryText: buildSummaryText(startUrl, homepage, aboutPage, productPages), + }; +} + +module.exports = { buildCrawlSummary }; diff --git a/server/services/firecrawl.js b/server/services/firecrawl.js index fa1d624..e60ab22 100644 --- a/server/services/firecrawl.js +++ b/server/services/firecrawl.js @@ -1,44 +1,273 @@ const axios = require('axios'); -/** - * Scrape a website using Firecrawl. - * Returns normalized fields plus the raw response payload for downstream UI/debug use. - */ -async function scrape(url) { +const FIRECRAWL_BASE_URL = 'https://api.firecrawl.dev/v2'; +const DEFAULT_PRODUCT_PAGE_LIMIT = 5; +const HOMEPAGE_FORMATS = ['markdown', 'links', 'images', 'branding']; +const DISCOVERY_PAGE_FORMATS = ['markdown', 'links', 'images']; +const CONTENT_PAGE_FORMATS = ['markdown', 'images']; + +function getApiKey() { const apiKey = process.env.FIRECRAWL_API_KEY; if (!apiKey) throw new Error('FIRECRAWL_API_KEY not set'); + return apiKey; +} + +function getClient() { + return axios.create({ + baseURL: FIRECRAWL_BASE_URL, + timeout: 60000, + headers: { + Authorization: `Bearer ${getApiKey()}`, + 'Content-Type': 'application/json', + }, + validateStatus: () => true, + }); +} + +function normalizeDataEnvelope(data) { + if (data && typeof data === 'object' && data.data && typeof data.data === 'object') { + return data.data; + } + return data && typeof data === 'object' ? data : {}; +} + +function normalizeText(value) { + return typeof value === 'string' ? value.trim() : ''; +} + +function normalizeUrl(rawUrl, baseUrl = '') { + const value = normalizeText(rawUrl); + if (!value) return ''; try { - const response = await axios.post( - 'https://api.firecrawl.dev/v1/scrape', - { url, formats: ['markdown', 'links'] }, - { - headers: { - Authorization: `Bearer ${apiKey}`, - 'Content-Type': 'application/json', - }, - timeout: 30000, - } - ); - - const data = response.data?.data || response.data; - const markdownLen = typeof data?.markdown === 'string' ? data.markdown.length : 0; - const linksCount = Array.isArray(data?.links) ? data.links.length : 0; - - console.log( - `[Firecrawl] scrape success | status=${response.status} | markdownLen=${markdownLen} | links=${linksCount}` - ); - - return { - markdown: typeof data?.markdown === 'string' ? data.markdown : '', - links: Array.isArray(data?.links) ? data.links : [], - metadata: data?.metadata && typeof data.metadata === 'object' ? data.metadata : {}, - images: Array.isArray(data?.images) ? data.images : [], - json: data && typeof data === 'object' ? data : {}, - }; - } catch (err) { - throw err; + const url = baseUrl ? new URL(value, baseUrl) : new URL(value); + url.hash = ''; + url.search = ''; + return url.toString().replace(/\/$/, ''); + } catch { + return ''; } } -module.exports = { scrape }; +function normalizePageResult(page = {}, pageType = '') { + const metadata = page?.metadata && typeof page.metadata === 'object' ? page.metadata : {}; + + return { + url: normalizeUrl(page.url), + pageType: normalizeText(pageType || page.pageType), + markdown: typeof page.markdown === 'string' ? page.markdown : '', + summary: typeof page.summary === 'string' ? page.summary : '', + metadata, + links: Array.isArray(page.links) ? page.links : [], + images: Array.isArray(page.images) ? page.images : [], + branding: page?.branding && typeof page.branding === 'object' ? page.branding : {}, + }; +} + +function normalizeLinkItem(link, baseUrl = '') { + if (typeof link === 'string') { + const href = normalizeUrl(link, baseUrl); + return href ? { href, label: href } : null; + } + + if (!link || typeof link !== 'object') return null; + + const href = normalizeUrl(link.href || link.url || link.link, baseUrl); + if (!href) return null; + + return { + href, + label: normalizeText(link.text || link.title || link.label) || href, + }; +} + +function dedupeLinkItems(links, baseUrl = '') { + const seen = new Set(); + return (Array.isArray(links) ? links : []) + .map((link) => normalizeLinkItem(link, baseUrl)) + .filter((link) => { + if (!link || seen.has(link.href)) return false; + seen.add(link.href); + return true; + }); +} + +function getHostname(url) { + try { + return new URL(url).hostname; + } catch { + return ''; + } +} + +function getPathname(url) { + try { + return new URL(url).pathname.toLowerCase(); + } catch { + return ''; + } +} + +function isSameDomain(url, startUrl) { + return getHostname(url) === getHostname(startUrl); +} + +function isUtilityLink(link) { + const value = `${normalizeText(link?.href)} ${normalizeText(link?.label)}`.toLowerCase(); + return /(login|sign[- ]?in|sign[- ]?up|account|cart|checkout|wishlist|track|tracking|privacy|refund|return|shipping|terms|policy|policies|contact|support|help|faq|blog|blogs|journal|careers?|gift card|stores?)/i.test(value); +} + +function isAboutLink(link) { + const value = `${normalizeText(link?.href)} ${normalizeText(link?.label)}`.toLowerCase(); + return /(about|about-us|our story|our-story|story|brand story|who we are|who-we-are)/i.test(value); +} + +function isDiscoveryLink(link) { + const value = `${normalizeText(link?.href)} ${normalizeText(link?.label)}`.toLowerCase(); + return /(shop|products?|collections?|catalog|storefront|category|categories|new arrivals|best sellers|featured)/i.test(value); +} + +function scoreProductCandidate(link) { + if (!link || isUtilityLink(link) || isAboutLink(link) || isDiscoveryLink(link)) return -100; + + const href = normalizeText(link.href).toLowerCase(); + const label = normalizeText(link.label); + const pathname = getPathname(href); + const segments = pathname.split('/').filter(Boolean); + let score = 0; + + if (/(^|\/)(product|products|p|item|items|buy)(\/|$)/i.test(pathname)) score += 12; + if (/-\d{4,}$/.test(pathname) || /\d{4,}/.test(pathname)) score += 5; + if (pathname.includes('/collections/') || pathname.includes('/category/')) score -= 8; + if (segments.length >= 2) score += 3; + if (label.length >= 8 && label.length <= 120) score += 3; + if (/buy|shop now|view product|details/i.test(label)) score += 4; + if (href.split('-').length >= 4) score += 2; + + return score; +} + +function getSameDomainLinks(links, startUrl) { + return dedupeLinkItems(links, startUrl).filter((link) => isSameDomain(link.href, startUrl)); +} + +function selectAboutUrl(links, startUrl) { + return getSameDomainLinks(links, startUrl).find(isAboutLink)?.href || ''; +} + +function selectDiscoveryUrl(links, startUrl) { + return getSameDomainLinks(links, startUrl) + .filter((link) => isDiscoveryLink(link) && !isUtilityLink(link)) + .map((link) => link.href)[0] || ''; +} + +function selectProductUrls(links, startUrl, limit = DEFAULT_PRODUCT_PAGE_LIMIT) { + return getSameDomainLinks(links, startUrl) + .map((link) => ({ ...link, score: scoreProductCandidate(link) })) + .filter((link) => link.score > 0) + .sort((left, right) => right.score - left.score) + .slice(0, limit) + .map((link) => link.href); +} + +async function scrapePage(url, { formats, onlyMainContent = true, pageType = '' } = {}) { + const client = getClient(); + const response = await client.post('/scrape', { + url, + formats, + onlyMainContent, + }); + + if (response.status < 200 || response.status >= 300) { + throw new Error(`Firecrawl scrape failed for ${url} with status ${response.status}: ${JSON.stringify(response.data)}`); + } + + return normalizePageResult(normalizeDataEnvelope(response.data), pageType); +} + +async function buildBrandContextPlan(startUrl) { + const normalizedStartUrl = normalizeUrl(startUrl); + const homepage = await scrapePage(normalizedStartUrl, { + formats: HOMEPAGE_FORMATS, + onlyMainContent: false, + pageType: 'home', + }); + + const homepageLinks = getSameDomainLinks(homepage.links, normalizedStartUrl); + const aboutUrl = selectAboutUrl(homepageLinks, normalizedStartUrl); + const directProductUrls = selectProductUrls(homepageLinks, normalizedStartUrl); + const discoveryUrl = directProductUrls.length === 0 + ? selectDiscoveryUrl(homepageLinks, normalizedStartUrl) + : ''; + + return { + startUrl: normalizedStartUrl, + homepage, + aboutUrl, + discoveryUrl, + productUrls: directProductUrls, + }; +} + +async function collectBrandContextPages(plan) { + const startUrl = normalizeUrl(plan?.startUrl); + if (!startUrl || !plan?.homepage) { + throw new Error('Brand context plan is missing a homepage scrape'); + } + + let discoveryPage = null; + let productUrls = Array.isArray(plan.productUrls) ? plan.productUrls.slice(0, DEFAULT_PRODUCT_PAGE_LIMIT) : []; + + if (productUrls.length === 0 && normalizeText(plan.discoveryUrl)) { + discoveryPage = await scrapePage(plan.discoveryUrl, { + formats: DISCOVERY_PAGE_FORMATS, + onlyMainContent: true, + pageType: 'discovery', + }); + productUrls = selectProductUrls(discoveryPage.links, startUrl); + } + + const aboutPromise = normalizeText(plan.aboutUrl) + ? scrapePage(plan.aboutUrl, { + formats: CONTENT_PAGE_FORMATS, + onlyMainContent: true, + pageType: 'about', + }) + : Promise.resolve(null); + + const productPromises = productUrls + .slice(0, DEFAULT_PRODUCT_PAGE_LIMIT) + .map((productUrl) => scrapePage(productUrl, { + formats: CONTENT_PAGE_FORMATS, + onlyMainContent: true, + pageType: 'product', + }).catch(() => null)); + + const [aboutPage, productResults] = await Promise.all([ + aboutPromise, + Promise.all(productPromises), + ]); + + const productPages = productResults.filter(Boolean); + const items = [ + plan.homepage, + aboutPage, + discoveryPage, + ...productPages, + ].filter(Boolean); + + return { + startUrl, + homepage: plan.homepage, + aboutPage, + discoveryPage, + productPages, + items, + }; +} + +module.exports = { + scrapePage, + buildBrandContextPlan, + collectBrandContextPages, +}; diff --git a/server/services/openai2.js b/server/services/openai2.js index 032d597..ff4b4e7 100644 --- a/server/services/openai2.js +++ b/server/services/openai2.js @@ -50,18 +50,47 @@ async function postWorkflow(url, payload) { } async function parseBrandContext(scrapedData = {}) { - const markdown = String(scrapedData.markdown || '').slice(0, 8000); - const links = Array.isArray(scrapedData.links) ? scrapedData.links.slice(0, 200) : []; + const representativePages = Array.isArray(scrapedData.representativePages) + ? scrapedData.representativePages.slice(0, 20) + : []; + const representativeTextBlocks = Array.isArray(scrapedData.representativeTextBlocks) + ? scrapedData.representativeTextBlocks.slice(0, 20) + : []; + const productPages = Array.isArray(scrapedData.productPages) + ? scrapedData.productPages.slice(0, 5) + : []; + const contentDigest = representativeTextBlocks + .map((block) => { + const title = String(block?.title || '').trim(); + const pageType = String(block?.pageType || '').trim(); + const text = String(block?.text || '').trim(); + return [title, pageType, text].filter(Boolean).join(' | '); + }) + .filter(Boolean) + .join('\n\n') + .slice(0, 14000); const payload = { task: 'parse_brand_context', request_id: requestId('parse_brand_context'), - markdown, - links_json: JSON.stringify(links), - metadata_json: JSON.stringify(scrapedData.metadata || {}), - images_json: JSON.stringify(scrapedData.images || []), - raw_json_blob: JSON.stringify(scrapedData.json || {}), - output_schema_text: 'Return ONLY valid JSON object with exactly these keys: brandName (string), tone (one of: friendly, professional, formal, casual, energetic), taglines (array of strings, max 3), colors (array of hex color strings, or empty array), relevantImageUrls (array of 3-5 absolute image URLs for logo/hero/product images only; no icons/tracking/data URLs). No markdown, no prose, no extra keys.', + start_url: String(scrapedData.startUrl || ''), + domain: String(scrapedData.domain || ''), + site_stats_json: JSON.stringify(scrapedData.siteStats || {}), + homepage_json: JSON.stringify(scrapedData.homepage || {}), + about_page_json: JSON.stringify(scrapedData.aboutPage || {}), + product_pages_json: JSON.stringify(productPages), + contact_page_json: JSON.stringify(scrapedData.contactPage || {}), + representative_pages_json: JSON.stringify(representativePages), + representative_text_blocks_json: JSON.stringify(representativeTextBlocks), + navigation_json: JSON.stringify(scrapedData.navigation || []), + policy_pages_json: JSON.stringify(scrapedData.policyPages || []), + links_json: JSON.stringify(scrapedData.links || []), + top_images_json: JSON.stringify(scrapedData.topImages || []), + screenshots_json: JSON.stringify(scrapedData.screenshots || []), + branding_json: JSON.stringify(scrapedData.branding || {}), + crawl_summary_json: JSON.stringify(scrapedData || {}), + content_digest: contentDigest, + output_schema_text: 'You are given homepage, about-page, product-page, branding, and image evidence for a storefront. Use that evidence to infer brand identity and product language. Return ONLY valid JSON object with exactly these keys: brandName (string), tone (one of: friendly, professional, formal, casual, energetic), taglines (array of strings, max 3), colors (array of hex color strings, or empty array), relevantImageUrls (array of 3-5 absolute image URLs for logo/hero/product images only; no icons/tracking/data URLs), aboutSummary (string, 2-4 sentences, concise customer-facing brand summary that explains what the brand is about, what it sells, and its vibe; do not copy the About Us page verbatim). No markdown, no prose, no extra keys.', must_return_json_only: 'true', }; @@ -76,6 +105,7 @@ async function parseBrandContext(scrapedData = {}) { taglines: Array.isArray(output.taglines) ? output.taglines.slice(0, 3).map(String) : [], colors: Array.isArray(output.colors) ? output.colors.map(String) : [], relevantImageUrls: Array.isArray(output.relevantImageUrls) ? output.relevantImageUrls.map(String) : [], + aboutSummary: String(output.aboutSummary || '').trim(), }; }