45 lines
1.4 KiB
JavaScript
45 lines
1.4 KiB
JavaScript
const axios = require('axios');
|
|
|
|
/**
|
|
* Scrape a website using Firecrawl.
|
|
* Returns normalized fields plus the raw response payload for downstream UI/debug use.
|
|
*/
|
|
async function scrape(url) {
|
|
const apiKey = process.env.FIRECRAWL_API_KEY;
|
|
if (!apiKey) throw new Error('FIRECRAWL_API_KEY not set');
|
|
|
|
try {
|
|
const response = await axios.post(
|
|
'https://api.firecrawl.dev/v1/scrape',
|
|
{ url, formats: ['markdown', 'links'] },
|
|
{
|
|
headers: {
|
|
Authorization: `Bearer ${apiKey}`,
|
|
'Content-Type': 'application/json',
|
|
},
|
|
timeout: 30000,
|
|
}
|
|
);
|
|
|
|
const data = response.data?.data || response.data;
|
|
const markdownLen = typeof data?.markdown === 'string' ? data.markdown.length : 0;
|
|
const linksCount = Array.isArray(data?.links) ? data.links.length : 0;
|
|
|
|
console.log(
|
|
`[Firecrawl] scrape success | status=${response.status} | markdownLen=${markdownLen} | links=${linksCount}`
|
|
);
|
|
|
|
return {
|
|
markdown: typeof data?.markdown === 'string' ? data.markdown : '',
|
|
links: Array.isArray(data?.links) ? data.links : [],
|
|
metadata: data?.metadata && typeof data.metadata === 'object' ? data.metadata : {},
|
|
images: Array.isArray(data?.images) ? data.images : [],
|
|
json: data && typeof data === 'object' ? data : {},
|
|
};
|
|
} catch (err) {
|
|
throw err;
|
|
}
|
|
}
|
|
|
|
module.exports = { scrape };
|