import z from "zod";

export function extractAllImageUrlsAndRemoveEmbeddings(markdown: string): {
  imageUrls: string[];
  textWithoutImages: string;
} {
  const imageRegex = /!\[.*?\]\((.*?)(?:\s+"[^"]+")?\)/g;

  const imageUrls: string[] = [];
  let match;
  while ((match = imageRegex.exec(markdown)) !== null) {
    if (match[1]) {
      imageUrls.push(match[1]);
    }
  }

  const textWithoutImages = markdown.replace(imageRegex, "").trim();

  return { imageUrls, textWithoutImages };
}

export function replaceMarkdownUrlsWithRawUrls(text: string): string {
  // Handle markdown links with special handling for mailto links
  return text.replace(
    /\[([^\]]+)\]\(([^)]+)\)/g,
    (_match: string, linkText: string, url: string) => {
      // If it's a mailto link, preserve it without encoding
      if (url.startsWith("mailto:")) {
        return `${linkText} (${url})`;
      }
      // Handle other URLs by encoding them
      return `${linkText} (${encodeURI(url)})`;
    },
  );
}

export function isEmailAddr({ text }: { text: string }) {
  try {
    z.string().email().parse(text);
    return true;
  } catch (e) {
    return false;
  }
}

export function isNumeric({ text }: { text: string }) {
  try {
    z.coerce.number().parse(text);
    return true;
  } catch (e) {
    return false;
  }
}

export function slugify(text: string, maxLength = Infinity) {
  const slug = text
    .trim()
    .toLowerCase()
    .normalize("NFD") // Normalize to decompose combined characters
    .replace(/[\u0300-\u036f]/g, "") // Remove diacritical marks
    .replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu, "") // Strip non-letter/number characters from the beginning and end
    .replace(/[^\p{L}\p{N}]+/gu, "-") // Replace any non-letter/number characters with a dash
    .replace(/-+/g, "-") // Replace multiple dashes in a row with a single dash
    .replace(/^-+|-+$/g, ""); // Remove leading or trailing dashes

  // Truncate the slug if it exceeds the maxLength
  return slug.slice(0, maxLength).replace(/-+$/, "");
}

export function generateRandomNumericString(size: number) {
  if (size <= 0) {
    throw new Error("Size must be a positive integer.");
  }

  const min = Math.pow(10, size - 1);
  const max = Math.pow(10, size) - 1;

  return Math.floor(min + Math.random() * (max - min + 1)).toString();
}

export function startsWithVowel(string: string) {
  if (string.length === 0) {
    return false;
  }

  const vowels = ["a", "e", "i", "o", "u"];

  return vowels.includes(string.charAt(0).toLowerCase());
}

export function dedent(str: string) {
  const lines = str.split("\n");
  let indent;
  if (lines[0] === "") {
    // If the first character is a newline, use the indentation level of the second line
    if (lines.length > 1) {
      const match = lines[1].match(/^[ \t]*/);
      indent = match ? match[0].length : 0;
    } else {
      indent = 0;
    }
  } else {
    // Otherwise, count the number of spaces/indentation in the first line
    const match = lines[0].match(/^[ \t]*/);
    indent = match ? match[0].length : 0;
  }

  // Trim leading and trailing whitespace
  str = str.trim();

  if (indent === 0) return str;

  // Remove the common indentation from each line
  const re = new RegExp(`^[ \\t]{0,${indent}}`, "gm");
  return str.replace(re, "");
}

export function extractEmailAddress(str: string) {
  const match = str.match(
    /"<?([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,})>?"/g,
  );

  const email = match ? match[0] : undefined;

  if (email) {
    return email.replace("<", "").replace(">", "");
  }
}

export function cleanUrls({
  urls,
  publicQueryStringKeys,
}: {
  urls: string[];
  publicQueryStringKeys: string[] | undefined;
}) {
  if (!publicQueryStringKeys) {
    return urls.map((url) => {
      // Remove https://www., http://www., or just www. if present
      const cleanUrl = url.replace(/^(https?:\/\/)?www\./i, "");
      const [baseUrl] = cleanUrl.split("?");
      return baseUrl;
    });
  }

  return urls.map((url) => {
    // Remove https://www., http://www., or just www. if present
    const cleanUrl = url.replace(/^(https?:\/\/)?www\./i, "");
    const [baseUrl, queryString] = cleanUrl.split("?");

    if (!queryString) return baseUrl;

    const params = new URLSearchParams(queryString);
    const filteredParams = new URLSearchParams();

    for (const key of publicQueryStringKeys) {
      const value = params.get(key);
      if (value !== null) {
        filteredParams.set(key, value);
      }
    }

    const filteredQueryString = filteredParams.toString();
    return filteredQueryString ? `${baseUrl}?${filteredQueryString}` : baseUrl;
  });
}

export function escapeElasticsearchSpecialChars(str: string): string {
  // Split into single-char and multi-char special characters
  const multiCharSpecials = ["&&", "||"];
  const singleCharSpecials = [
    "+",
    "-",
    "=",
    ">",
    "<",
    "!",
    "(",
    ")",
    "{",
    "}",
    "[",
    "]",
    "^",
    '"',
    "~",
    "*",
    "?",
    ":",
    "\\",
    "/",
  ];

  // First escape multi-character operators
  const escaped = multiCharSpecials.reduce(
    (result, char) =>
      result.replace(new RegExp(escapeRegExp(char), "g"), "\\" + char),
    str,
  );

  // Then escape single characters
  return singleCharSpecials.reduce(
    (result, char) =>
      result.replace(new RegExp(escapeRegExp(char), "g"), "\\" + char),
    escaped,
  );
}

// Helper function to escape special regex characters
function escapeRegExp(string: string) {
  return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}

export const greekEntities: Record<string, string> = {
  "&Alpha;": "Α",
  "&Beta;": "Β",
  "&Gamma;": "Γ",
  "&Delta;": "Δ",
  "&Epsilon;": "Ε",
  "&Zeta;": "Ζ",
  "&Eta;": "Η",
  "&Theta;": "Θ",
  "&Iota;": "Ι",
  "&Kappa;": "Κ",
  "&Lambda;": "Λ",
  "&Mu;": "Μ",
  "&Nu;": "Ν",
  "&Xi;": "Ξ",
  "&Omicron;": "Ο",
  "&Pi;": "Π",
  "&Rho;": "Ρ",
  "&Sigma;": "Σ",
  "&Tau;": "Τ",
  "&Upsilon;": "Υ",
  "&Phi;": "Φ",
  "&Chi;": "Χ",
  "&Psi;": "Ψ",
  "&Omega;": "Ω",
  "&alpha;": "α",
  "&beta;": "β",
  "&gamma;": "γ",
  "&delta;": "δ",
  "&epsilon;": "ε",
  "&zeta;": "ζ",
  "&eta;": "η",
  "&theta;": "θ",
  "&iota;": "ι",
  "&kappa;": "κ",
  "&lambda;": "λ",
  "&mu;": "μ",
  "&nu;": "ν",
  "&xi;": "ξ",
  "&omicron;": "ο",
  "&pi;": "π",
  "&rho;": "ρ",
  "&sigmaf;": "ς",
  "&sigma;": "σ",
  "&tau;": "τ",
  "&upsilon;": "υ",
  "&phi;": "φ",
  "&chi;": "χ",
  "&psi;": "ψ",
  "&omega;": "ω",
  "&nbsp;": " ",
  "&amp;": "&",
  "&acute;": "´",
  "&uml;": "¨",
};

export function decodeGreekText(text: string): string {
  // First decode the &amp; to &
  let decoded = text.replace(/&amp;/g, "&");
  // Then replace the Greek entities
  for (const [entity, char] of Object.entries(greekEntities)) {
    decoded = decoded.replace(
      new RegExp(entity.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "g"),
      char,
    );
  }
  // Handle any remaining HTML entities
  decoded = decoded.replace(/&[a-zA-Z]+;/g, "");
  // Fix multiple spaces and normalize whitespace
  decoded = decoded.replace(/\s+/g, " ").trim();
  return decoded;
}

export function splitTextIntoChunks(text: string, maxLength = 1000): string[] {
  const result: string[] = [];
  let remainingText = text;

  while (remainingText.length > maxLength) {
    const substring = remainingText.slice(0, maxLength);

    // Try to find the first double newline
    let splitIndex = substring.indexOf("\n\n");

    // If no double newline, find first sentence boundary
    if (splitIndex === -1) {
      const match = substring.match(/[.!?]\s/);
      splitIndex = match?.index !== undefined ? match.index + 1 : maxLength;
    }

    // Split the text
    result.push(remainingText.slice(0, splitIndex).trim());
    remainingText = remainingText.slice(splitIndex).trim();
  }

  // Add the last remaining part
  if (remainingText.length > 0) {
    result.push(remainingText);
  }

  return result;
}
