/*
  This file is part of TALER
  (C) 2025 Taler Systems SA

  TALER is free software; you can redistribute it and/or modify it under the
  terms of the GNU Lesser General Public License as published by the Free Software
  Foundation; either version 3, or (at your option) any later version.

  TALER is distributed in the hope that it will be useful, but WITHOUT ANY
  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  A PARTICULAR PURPOSE.  See the GNU General Public License for more details.

  You should have received a copy of the GNU General Public License along with
  TALER; see the file COPYING.  If not, see <http://www.gnu.org/licenses/>
*/
/**
 * @file validators.c
 * @brief Input validators
 * @author Christian Grothoff
 */
#include "platform.h"
#include <gnunet/gnunet_util_lib.h>
#include <gnunet/gnunet_db_lib.h>
#include <taler/taler_json_lib.h>
#include "taler_merchant_util.h"
#include <regex.h>

bool
TALER_MERCHANT_image_data_url_valid (const char *image_data_url)
{
  if (0 == strcmp (image_data_url,
                   ""))
    return true;
  if (0 != strncasecmp ("data:image/",
                        image_data_url,
                        strlen ("data:image/")))
  {
    GNUNET_break_op (0);
    return false;
  }
  if (NULL == strstr (image_data_url,
                      ";base64,"))
  {
    GNUNET_break_op (0);
    return false;
  }
  if (! TALER_url_valid_charset (image_data_url))
  {
    GNUNET_break_op (0);
    return false;
  }
  return true;
}


bool
TALER_MERCHANT_email_valid (const char *email)
{
  regex_t regex;
  bool is_valid;

  if ('\0' == email[0])
    return false;

  /* Maximum email length per RFC 5321 */
  if (strlen (email) > 254)
    return false;

  /*
   * Email regex pattern supporting:
   *
   * Local part (before @):
   * - Dot-atom: alphanumeric, dots, hyphens, underscores
   *   (no leading/trailing dots, no consecutive dots)
   * - Quoted-string: quoted text with escaped chars inside
   *
   * Domain part (after @):
   * - Domain labels: alphanumeric and hyphens
   *   (no leading/trailing hyphens per label)
   * - IP literals: [IPv4] or [IPv6:...]
   *
   * Pattern breakdown:
   * Local part:
   *   ([a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+
   *    (\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)*)
   *   = dot-atom (atext chars, dots allowed between parts)
   *
   *   |"([^"\\]|\\.)*"
   *   = quoted-string (anything in quotes with escaping)
   *
   * Domain part:
   *   ([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
   *    (\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)
   *   = domain labels (63 chars max, hyphens in middle)
   *
   *   |\[([0-9]{1,3}\.){3}[0-9]{1,3}\]
   *   = IPv4 literal
   *
   *   |\[IPv6:[0-9a-fA-F:]+\]
   *   = IPv6 literal
   */
  const char *pattern =
    "^("
    /* Local part: dot-atom-text or quoted-string */
    "([a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(\\.)?)*[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+"
    "|"
    "\"([^\"\\\\]|\\\\.)*\""
    ")"
    "@"
    "("
    /* Domain: domain labels (with at least one dot) or IP literal */
    "([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)"
    "|"
    "\\[((([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}"
    "([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))\\]"
    "|"
    "\\[IPv6:[0-9a-fA-F:]*[0-9a-fA-F]\\]"
    ")$";

  GNUNET_assert (0 ==
                 regcomp (&regex,
                          pattern,
                          REG_EXTENDED | REG_NOSUB));
  is_valid = (0 ==
              regexec (&regex,
                       email,
                       0,
                       NULL,
                       0));
  regfree (&regex);
  return is_valid;
}


char *
TALER_MERCHANT_phone_validate_normalize (const char *phone,
                                         bool allow_letters)
{
  if ('\0' == phone[0])
    return NULL;

  /* Maximum phone length (reasonable practical limit) */
  if (strlen (phone) > 30)
    return NULL;

  {
    regex_t regex;
    int ret;

    /*
     * Phone number regex pattern with +CC prefix requirement:
     *
     * Supports:
     * - Country codes (1-3 digits after +)
     * - Variable length national numbers
     * - Spaces, hyphens, and dots as separators
     * - Parentheses for area codes
     * - Optional extension notation (x, ext, extension)
     * - Optional letters representing digits (2-9) if allow_letters is true
     *
     * Examples:
     *   +1-202-555-0173
     *   +33 1 42 68 53 00
     *   +44.20.7946.0958
     *   +1 (202) 555-0173
     *   +886 2 2345 6789
     *   +1-800-CALL-NOW (if allow_letters is true)
     *   +49-30-12345678x123
     *
     * Pattern breakdown:
     * ^\+[0-9]{1,3}
     *   = Plus sign followed by 1-3 digit country code
     *
     * [-. ]?
     *   = Optional separator after country code
     *
     * (\([0-9]{1,4}\)[-. ]?)?
     *   = Optional parenthesized area code with separator
     *
     * [0-9A-Z]
     *   = Start with digit or letter
     *
     * ([-. ]?[0-9A-Z])*
     *   = Digit/letter groups with optional separators
     *
     * ([ ]?(x|ext|extension)[ ]?[0-9]{1,6})?
     *   = Optional extension
     *
     * $
     *   = End of string
     */
    const char *pattern_digits =
      "^\\+[0-9]{1,3}"                     /* Plus and country code (1-3 digits) */
      "[-. ]?"                             /* Optional single separator */
      "("                                  /* Optional area code group */
      "\\([0-9]{1,4}\\)"                   /* Area code in parens */
      "[-. ]?"                             /* Optional separator after parens */
      ")?"
      "[0-9]"                              /* Start national number with digit */
      "("                                  /* National number: alternating digits and separators */
      "[-. ]?[0-9]"                        /* Separator optionally followed by digit */
      ")*"
      "([ ]?(x|ext|extension)[ ]?[0-9]{1,6})?" /* Optional extension */
      "$";

    const char *pattern_with_letters =
      "^\\+[0-9]{1,3}"                     /* Plus and country code (1-3 digits) */
      "[-. ]?"                             /* Optional single separator */
      "("                                  /* Optional area code group */
      "\\([0-9]{1,4}\\)"                   /* Area code in parens */
      "[-. ]?"                             /* Optional separator after parens */
      ")?"
      "[0-9A-Z]"                           /* Start national number with digit or letter */
      "("                                  /* National number: alternating digits/letters and separators */
      "[-. ]?[0-9A-Z]"                     /* Separator optionally followed by digit or letter */
      ")*"
      "([ ]?(x|ext|extension)[ ]?[0-9]{1,6})?" /* Optional extension */
      "$";

    const char *pattern = allow_letters
      ? pattern_with_letters
      : pattern_digits;

    GNUNET_assert (0 ==
                   regcomp (&regex,
                            pattern,
                            REG_EXTENDED | REG_NOSUB | REG_ICASE));
    ret = regexec (&regex,
                   phone, 0,
                   NULL, 0);
    regfree (&regex);
    if (0 != ret)
      return NULL; /* invalid number */
  }

  /* Phone is valid - normalize it */
  {
    char *normalized;
    char *out;

    normalized = GNUNET_malloc (strlen (phone) + 1);
    out = normalized;
    *out++ = '+';  /* Start with plus sign */

    for (const char *in = phone;
         '\0' != *in;
         in++)
    {
      if (isdigit ((unsigned char) *in))
      {
        /* Copy digit as-is */
        *out++ = *in;
      }
      else if (allow_letters && isalpha ((unsigned char) *in))
      {
        /* Convert letter to corresponding digit (A-Z maps to 2-9) */
        char upper = toupper ((unsigned char) *in);
        /* T9 keypad mapping:
         * 2: ABC
         * 3: DEF
         * 4: GHI
         * 5: JKL
         * 6: MNO
         * 7: PQRS
         * 8: TUV
         * 9: WXYZ
         */
        char digit;

        if (upper >= 'A' && upper <= 'C')
          digit = '2';
        else if (upper >= 'D' && upper <= 'F')
          digit = '3';
        else if (upper >= 'G' && upper <= 'I')
          digit = '4';
        else if (upper >= 'J' && upper <= 'L')
          digit = '5';
        else if (upper >= 'M' && upper <= 'O')
          digit = '6';
        else if (upper >= 'P' && upper <= 'S')
          digit = '7';
        else if (upper >= 'T' && upper <= 'V')
          digit = '8';
        else if (upper >= 'W' && upper <= 'Z')
          digit = '9';
        else
          digit = '0';  /* Fallback (shouldn't happen) */
        *out++ = digit;
      }
      /* Skip separators, parentheses, and spaces */
      /* Skip 'x', 'ext', 'extension' keywords and their extension digits */
    }
    *out = '\0'; /* redundant, but helps analyzers... */
    return normalized;
  }
}
