import sanitize from 'sanitize-html';

export function extractJSONFromHTML(html: string, scriptId: string) {
  // reduce the size of the string to be searched by stripping out all
  // tags except <script> tags
  const sanitizedHTML = sanitize(html, {
    allowedTags: ['script'],
    allowVulnerableTags: true,
    allowedAttributes: false,
    parseStyleAttributes: false,
  });
  // locate the script tag in the sanitized  html
  const scriptTag = `<script id="${scriptId}" type="application/json">`;
  const startIdx = sanitizedHTML.indexOf(scriptTag) + scriptTag.length;
  const endIdx = sanitizedHTML.indexOf('</script>', startIdx);
  // return--as a string--the JSON object embedded within the <script> tag
  return sanitizedHTML.slice(startIdx, endIdx);
}

export function sanitizeJSON(jsonString: string) {
  const trailingCommasRemoved = removeTrailingCommas(jsonString);
  const booleanStringsConverted = convertBooleanStrings(trailingCommasRemoved);
  return JSON.parse(booleanStringsConverted);
}

export function removeTrailingCommas(jsonString: string) {
  return (
    jsonString
      // comma before a closing curly brace
      .replace(/(,\s*})/g, '}')
      // comma before a closing square brace
      .replace(/(,\s*])/g, ']')
      // comma at the end of the entire object
      .replace(/(,\s*$)/g, '')
  );
}

export function convertBooleanStrings(jsonString: string) {
  return (
    jsonString
      // replace ': "true"' with ': true' (checks for 0+ spaces after ':')
      .replace(/:\s*"true"/gi, ': true')
      // replace ': "false"' with ': false' (checks for 0+ spaces after ':')
      .replace(/:\s*"false"/gi, ': false')
  );
}
