{*------------------------------------------------------------------------------
  Definition of wide and ansi string types for cross-platform apps

  @Author Patrick Michael Kolla
  @Version 2006/06/28  Added documentation
  @Todo Do real UTF-16 -> UTF-8 conversions
-------------------------------------------------------------------------------}
// *****************************************************************************
// Copyright: © 2006 Safer Networking Limited. All rights reserved.
// File:      pkStrings.pas
// Compiler:  FreePascal 2.1.1
// Purpose:   Downloading files by HTTP using functions from wininet.dll
// Authors:   Patrick M. Kolla (pk)
// *****************************************************************************
// Changelog (new entries first):
// ---------------------------------------
// 2006-06-28  pk  10m  Added Visibility property
// *****************************************************************************

unit pkStrings;

interface


uses SysUtils, Windows, Classes;

type
   {$IFDEF WinCE}
   {$DEFINE Unicode}
   PStringsChar = PWideChar;   /// A null-terminated string, Ansi or Unicode based on Operating System
   StringsString = WideString; /// A standard string, Ansi or Unicode based on Operating System
   {$ELSE WinCE}
   {$UNDEF Unicode}
   PStringsChar = PAnsiChar;   /// A null-terminated string, Ansi or Unicode based on Operating System
   StringsString = AnsiString; /// A standard string, Ansi or Unicode based on Operating System
   {$ENDIF WinCE}


procedure SeparatedStringToStringList(sInput: string; cSeparator: char; slOutput: TStringList);
function StringListToSeparatedString(slInput: TStrings; cSeparator: char): string;
procedure ReadTwoByteFileToStringList(Filename: String; slFile: TStringList);
procedure StreamReadUTF16FileToStrings(Filename: String; slFile: TStrings);
function TryPossibleUnicodeStringToAnsiStringSimple(AText: string): string;
function StringIsHexString(AText: string): boolean;
function StringSeemsValidString(AText: string): boolean;

implementation

{*------------------------------------------------------------------------------
  Separate a string by cutting it at a given character

  @param sInput String to be separated
  @param cSeparator Character to identify separation
  @param slOutput List where strings should be added
  @see   StringListToSeparatedString
-------------------------------------------------------------------------------}
procedure SeparatedStringToStringList(sInput: string; cSeparator: char; slOutput: TStringList);
begin
   if not Assigned(slOutput)
    then slOutput := TStringList.Create;
   while Pos(cSeparator,sInput)>0 do begin
      slOutput.Add(Copy(sInput,1,Pos(cSeparator,sInput)-1));
      Delete(sInput,1,Pos(cSeparator,sInput));
   end;
   slOutput.Add(sInput);
end;

{*------------------------------------------------------------------------------
  Separate a string list with a given separator char without the quote
  trouble created by TStrings.DelimitedText

  @param slInput List of strings to be separated
  @param cSeparator Character to use for string separation
  @see   SeparatedStringToStringList
-------------------------------------------------------------------------------}
function StringListToSeparatedString(slInput: TStrings; cSeparator: char): string;
var iString: integer;
begin
   Result := '';
   for iString := 0 to Pred(slInput.Count)
    do Result := Result + slInput[iString] + cSeparator;
end;

{*------------------------------------------------------------------------------
  Very ugly way to read Unicode text files with BOM into pure ansi strings.

  @param Filename Name of file to read
  @param slFile Destination string list
-------------------------------------------------------------------------------}
procedure ReadTwoByteFileToStringList(Filename: String; slFile: TStringList);
const bufsize = 256;
var resText: String;
    myFile: file;
    buf: array[0..bufsize-1] of char;
    bytesRead: integer;
    i: integer;
begin
   if FileExists(Filename) then begin
      if not Assigned(slFile) then slFile := TStringList.Create;
      AssignFile(myFile,Filename);
      Reset(myFile,1);
      resText := '';
      try
         BlockRead(myFile,buf,2);
         repeat
            BlockRead(myFile, buf, bufSize, bytesRead);
            for i := 0 to (bytesRead div 2)-1 do begin
               resText := resText + buf[(i*2)];
            end;
         until bytesRead < bufSize;
      finally
         CloseFile(myFile);
      end;
      if (Pos(#13#10,resText)=0)
       and (Pos(#13,resText)>0)
        then resText := StringReplace(resText, #13, #13#10, [rfReplaceAll]);
      
      slFile.Text := resText;
   end;
end;

{*------------------------------------------------------------------------------
  Loads an UTF-16 file into a stringlist containing only ANSI characters.

  @param Filename Name of file to read
  @param slFile Destination string list
-------------------------------------------------------------------------------}
procedure StreamReadUTF16FileToStrings(Filename: String; slFile: TStrings);
const bufSize = 4096;
var fs: TFileStream;
    buf: array[0..bufsize-1] of char;
    iByte, iInc, readSize: integer;
    resText: AnsiString;
    bom: Word;
begin
   fs := TFileStream.Create(Filename, fmOpenRead or fmShareDenyWrite);
   fs.Seek(0, soFromBeginning);
   fs.Read(bom, 2);
   if bom=$FFFE
    then iInc := 1;
   readSize := bufSize;
   resText := '';
   // WriteLn('File size is: '+IntToStr(fs.Size));
   while fs.Position<fs.Size do begin
      readSize := fs.Size - fs.Position;
      if readSize>bufSize
       then readSize := bufSize;
      // WriteLn('File position is: '+IntToStr(fs.Position)+', with block to be read: '+IntToStr(readSize));
      fs.ReadBuffer(buf, readSize);
      // TODO : write UTF-8?
      for iByte := 0 to (readSize div 2)-1
       do resText := resText + buf[(iByte*2)+iInc];
   end;
   fs.Free;
   slFile.Text := resText;
end;

{*------------------------------------------------------------------------------
  Attempts to convert a string containing Unicode into a readable ANSI string.

  @param AText  Text to convert
  @return ANSI string.
-------------------------------------------------------------------------------}
function TryPossibleUnicodeStringToAnsiStringSimple(AText: string): string;
var i: integer;
begin
   Result := AText;
   if Length(AText)<2
    then Exit;
   if AText[1]=#0 then begin
      Result := '';
      for i := 0 to Pred(Length(AText) div 2)
       do Result := Result + AText[(i*2)+2];
   end else if AText[2]=#0 then begin
      Result := '';
      for i := 0 to Pred(Length(AText) div 2)
       do Result := Result + AText[(i*2)+1];
   end;
end;

{*------------------------------------------------------------------------------
  Checks whether the string only contains character valid for hex strings; aka
  containing only 0 to 9 and a/A to f/F.

  @param AText String to check
-------------------------------------------------------------------------------}
function StringIsHexString(AText: string): boolean;
var i: integer;
begin
   Result := true;
   for i := 1 to Length(AText)
    do if not (AText[i] in ['0'..'9','a'..'f','A'..'F'])
     then begin
        Result := false;
        Exit;
     end;
end;

{*------------------------------------------------------------------------------
  Checks whether the string seems to be plain text, or contains special
  characters. Check is based on character ordinal being larger or smaller #32.

  @param AText String to check
-------------------------------------------------------------------------------}
function StringSeemsValidString(AText: string): boolean;
var i: integer;
begin
   Result := true;
   for i := 1 to Length(AText)
    do if not (AText[i]<#32)
     then begin
        Result := false;
        Exit;
     end;
end;

end.
