import { extractReadableContent, fetchFirecrawlContent } from "../src/agents/tools/web-tools.js";

const DEFAULT_URLS = [
  "https://en.wikipedia.org/wiki/Web_scraping",
  "https://news.ycombinator.com/",
  "https://www.apple.com/iphone/",
  "https://www.nytimes.com/",
  "https://www.reddit.com/r/javascript/",
];

const urls = process.argv.slice(2);
const targets = urls.length > 0 ? urls : DEFAULT_URLS;
const apiKey = process.env.FIRECRAWL_API_KEY;
const baseUrl = process.env.FIRECRAWL_BASE_URL ?? "https://api.firecrawl.dev";

const userAgent =
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36";
const timeoutMs = 30_000;

function truncate(value: string, max = 180): string {
  if (!value) {
    return "";
  }
  return value.length > max ? `${value.slice(0, max)}…` : value;
}

async function fetchHtml(url: string): Promise<{
  ok: boolean;
  status: number;
  contentType: string;
  finalUrl: string;
  body: string;
}> {
  const controller = new AbortController();
  const timer = setTimeout(() => controller.abort(), timeoutMs);
  try {
    const res = await fetch(url, {
      method: "GET",
      headers: { Accept: "*/*", "User-Agent": userAgent },
      signal: controller.signal,
    });
    const contentType = res.headers.get("content-type") ?? "application/octet-stream";
    const body = await res.text();
    return {
      ok: res.ok,
      status: res.status,
      contentType,
      finalUrl: res.url || url,
      body,
    };
  } finally {
    clearTimeout(timer);
  }
}

async function run() {
  if (!apiKey) {
    console.log("FIRECRAWL_API_KEY not set. Firecrawl comparisons will be skipped.");
  }

  for (const url of targets) {
    console.log(`\n=== ${url}`);
    let localStatus = "skipped";
    let localTitle = "";
    let localText = "";
    let localError: string | undefined;

    try {
      const res = await fetchHtml(url);
      if (!res.ok) {
        localStatus = `http ${res.status}`;
      } else if (!res.contentType.includes("text/html")) {
        localStatus = `non-html (${res.contentType})`;
      } else {
        const readable = await extractReadableContent({
          html: res.body,
          url: res.finalUrl,
          extractMode: "markdown",
        });
        if (readable?.text) {
          localStatus = "readability";
          localTitle = readable.title ?? "";
          localText = readable.text;
        } else {
          localStatus = "readability-empty";
        }
      }
    } catch (error) {
      localStatus = "error";
      localError = error instanceof Error ? error.message : String(error);
    }

    console.log(`local: ${localStatus} len=${localText.length} title=${truncate(localTitle, 80)}`);
    if (localError) {
      console.log(`local error: ${localError}`);
    }
    if (localText) {
      console.log(`local sample: ${truncate(localText)}`);
    }

    if (apiKey) {
      try {
        const firecrawl = await fetchFirecrawlContent({
          url,
          extractMode: "markdown",
          apiKey,
          baseUrl,
          onlyMainContent: true,
          maxAgeMs: 172_800_000,
          proxy: "auto",
          storeInCache: true,
          timeoutSeconds: 60,
        });
        console.log(
          `firecrawl: ok len=${firecrawl.text.length} title=${truncate(
            firecrawl.title ?? "",
            80,
          )} status=${firecrawl.status ?? "n/a"}`,
        );
        if (firecrawl.warning) {
          console.log(`firecrawl warning: ${firecrawl.warning}`);
        }
        if (firecrawl.text) {
          console.log(`firecrawl sample: ${truncate(firecrawl.text)}`);
        }
      } catch (error) {
        const message = error instanceof Error ? error.message : String(error);
        console.log(`firecrawl: error ${message}`);
      }
    }
  }

  process.exit(0);
}

run().catch((error) => {
  console.error(error);
  process.exit(1);
});
