
// SPDX-License-Identifier: CC-BY-NC-SA-4.0
//
// Copyright (C) 2026 Bit by Bit Signal Processing LLC (https://bxbsp.com)
//
// This work is placed under the "Creative Commons Attribution
// NonCommercial ShareAlike 4.0 International" license, known
// by the shortened acronym "CC-BY-NC-SA-4.0".
//
// This work is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// A CC-BY-NC-SA-4.0 license allows you to use this work for
// noncommercial purposes so long as attribution is made to the
// original author.  Modified versions of this work may be distributed,
// but only under the same license.  For further details, see the
// Creative Commons License "CC-BY-NC-SA-4.0".
//
// You should have received a copy of the CC-BY-NC-SA-4.0 license
// along with this work. If not, see
// <https://creativecommons.org/licenses/by-nc-sa/4.0/>.
//

//

#include "unalingua.hh"


unalingua::unalingua()
{
  text = 0;
}

unalingua::operator const char32_t*() const
{
  const char32_t* t = text;

  if(t)
    return t;

  return U"";
}

unalingua::~unalingua()
{
  if(text)
    delete [] text;
}

const char32_t* unalingua::get_text()
{
  return text;
}

unalingua::unalingua(const unalingua& text)
{
  this->text = 0;
  set_text(text);
}

unalingua::unalingua(const multilingua& text)
{
  this->text = 0;
  set_text(text);
}

char* new_UTF8(const char32_t* t)
{
  int len = 0;
  while(t[len])
    len++;

  char* txt = new char[len*4+1];

  int in=0;
  int out=0;

  while(t[in])
    {
      if(t[in]<=0x7F)
	{
	  txt[out++] = t[in++];
	}
      else if(t[in]<=0x7ff)
	{
	  txt[out++] = 0xC0 + ((t[in]>>6)&0x1F);
	  txt[out++] = 0x80 + (t[in]&0x3F);
	  in++;
	}
      else if(t[in]<=0xFFFF)
	{
      	  txt[out++] = 0xE0 + ((t[in]>>12)&0x0F);
      	  txt[out++] = 0x80 + ((t[in]>>6)&0x3F);
	  txt[out++] = 0x80 + (t[in]&0x3F);
	  in++;
	}
      else
	{
      	  txt[out++] = 0xF0 + ((t[in]>>18)&0x07);
      	  txt[out++] = 0x80 + ((t[in]>>12)&0x3F);
      	  txt[out++] = 0x80 + ((t[in]>>6)&0x3F);
	  txt[out++] = 0x80 + (t[in]&0x3F);
	  in++;
	}
    }
  
  txt[out] = 0;
  return txt;
}


char32_t* new_UTF32(const char* text)
{
  int len;
  for(len=0; text[len]; len++)
    ;
  char32_t* u32 = new char32_t[len+1];

  int j=0;
  for(int i=0; text[i]; i++)
    {
      unsigned char t0 = text[i];
      
      if(t0<128)
	{
	  u32[j++] = t0;
	}
      else if(t0<224)
	{
	  i++;
	  unsigned char t1 = text[i];
	  if(t1<128)
	    {
	      // This is an error for UTF8.  Ignore the first character and continue.
	      i--;
	      continue;
	    }
	  else
	    {
	      u32[j++] = ((char32_t(t0&0x1F))<<6) + (t1&0x3F);
	    }
	}
      else if(t0<240)
	{
	  i++;
	  unsigned char t1 = text[i];
	  i++;
	  unsigned char t2 = text[i];
	  if(t1<128 || t2<128)
	    {
	      // This is an error for UTF8.  Ignore the first character and continue.
	      i-=2;
	      continue;
	    }
	  else
	    {
	      u32[j++] = ((char32_t(t0&0xF))<<12) + ((char32_t(t1&0x3F))<<6) + (t2&0x3F);
	    }
	}
      else
	{
	  i++;
	  unsigned char t1 = text[i];
	  i++;
	  unsigned char t2 = text[i];
	  i++;
	  unsigned char t3 = text[i];
	  if(t1<128 || t2<128)
	    {
	      // This is an error for UTF8.  Ignore the first character and continue.
	      i-=3;
	      continue;
	    }
	  else
	    {
	      u32[j++] = ((char32_t(t0&0x7))<<18) + ((char32_t(t1&0x3F))<<12) + ((char32_t(t2&0x3F))<<6) + (t3&0x3F);
	    }
	}
    }
  
  u32[j] = 0;  

  return u32;
}

char32_t* new_UTF32(const wchar_t* txt)
{
  int len;
  for(len=0; txt[len]; len++)
    ;
  char32_t* u32 = new char32_t[len+1];

  for(len=0; txt[len]; len++)
    u32[len] = txt[len];

  u32[len] = 0;

  return u32;
}

char32_t* new_UTF32(const char16_t* txt)
{
  int len;
  for(len=0; txt[len]; len++)
    ;
  char32_t* u32 = new char32_t[len+1];

  for(len=0; txt[len]; len++)
    u32[len] = txt[len];

  u32[len] = 0;

  return u32;
}

char32_t* new_UTF32(const char32_t* txt)
{
  int len;
  for(len=0; txt[len]; len++)
    ;
  char32_t* u32 = new char32_t[len+1];

  for(len=0; txt[len]; len++)
    u32[len] = txt[len];

  u32[len] = 0;

  return u32;
}
