Jose, over in https://www.planetsquires.com/protect/forum/index.php?topic=4073.msg30869#msg30869 you mentioned that your "tally was faster"... and AfxStrTally is very fast...
But I'm hoping you can tell me how to make "AfxStrParse" faster (I'm on a Core i7 2nd Gen, SSD drives).
I have a file with 10,000 lines, but only 186K in size.
I open the file, figure out the line-endings, and "parse" to another array.
My problem is, the AfxStrParse is very slow... so I've been nosing around in AfxStr.inc and wfxMainMenu.inc trying "get smarter"... but I see sometimes you use the prefix "**" on variables and sometimes not. I have started feeling less smart.
(updated code further down)
Use CTextStream Class instead.
https://github.com/JoseRoca/WinFBX/blob/master/docs/File%20Management/CTextStream%20Class.md#OpenForInputA
Jose, I *really* do appreciate your classes, but this "pTextStream.ReadLine" doesn't work as expected for me.
IF pTextStream.OpenForInputW(wszFileName) = S_OK THEN
cbsBuf = ""
DO UNTIL pTextStream.EOS
cbsBuf = pTextStream.ReadLine
LOOP
pTextStream.Close
Else
jMessage "Error loading file '" & wszFileName & "' into memory (JLF 540)."
goto exitFunction
EndIf
It loaded the entire 10000 line text file in one pTextStream.ReadLine (I can remove the DO LOOP and it still reads the entire file in one call).
And, yes, my 10000 line text file (according to Notepad++) is CR LF...
But the file is ansi or unicode? If it is ansi, you should use OpenForInputA instead of OpenForInputW.
See example: https://github.com/JoseRoca/WinFBX/blob/master/docs/File%20Management/CTextStream%20Class.md#OpenForInputA
Ah, ok... the OpenForInputA worked.
I also changed from wszBuf to cbsBuf and that sped it up a lot.
SHOCK: I thought I would have to do the work of "detecting line-endings" but apparently OpenForInputA does that for me... NICE! THX!
Now... I think I asked you about 10 years ago, how do I detect a "unicode" text file from a "non-unicode" test file.
UPDATE: Ok, I answered my own question -- indeed, I can check the first few bytes for 254 and 255 (TF-16 BOM Big Endian) or 255 and 254 (UTF-16 BOM Little Endian) and that will require OpenForInputW, all else, OpenForInputA.
Thanks!!!
And, here's the source, for the curious... thanks again, Jose, for all the years. I've included some "CR line ending" code because the OpenForInput functions only handle LF and CRLF correctly and I detect if it's a UNICODE or ANSI file.
#Define UNICODE
#Define WIN_INCLUDEALL
#Include Once "windows.bi" ' includes crt.bi for qsort
#Include Once "fbio.bi" ' for IsRedirected
#Include Once "crt\stdio.bi" ' for C functions getc, putc, _putws
#Include Once "crt\wchar.bi" ' for C def WEOF
#Include Once "win\urlmon.bi"
#Include Once "win\wininet.bi"
#Include Once "win\winsock2.bi"
#Include Once "Afx\CWindow.inc"
#Include Once "Afx\CFileSys.inc"
#Include Once "Afx\AfxTime.inc"
#Include Once "Afx\AfxFile.inc"
Const CRLF As String = Chr(13,10)
Const CR As String = Chr(13)
Const LF As String = Chr(10)
' =============================================================================
Function jSplitString (ByVal source As CWSTR, destination(Any) As CWSTR, ByVal delimiter As CWSTR) As LONG
Dim As Integer position = InStr(1, source, delimiter)
Do While position
ReDim Preserve destination(UBound(destination)+1)
if position > 1 then
destination(UBound(destination)) = Left(source, position-1)
EndIf
source = Mid(source, position+Len(delimiter))
position = InStr(1, source, delimiter)
Loop
if Len(source) then
ReDim Preserve destination(UBound(destination)+1)
destination(UBound(destination)) = source
EndIf
Return UBound(destination)+1
End Function
' =============================================================================
Function jLoadFile (ByVal wszFileName As CWSTR, wszArray() As CWSTR) As LONG
Dim As Long x, fp, lNumLines = 0
Dim As String encodingType = "ANSI"
Dim As ZString * 4 zTemp
Dim As CBSTR cbsBuf = ""
Dim As CWSTR wszLocalArray(Any)
Dim As CTextStream pTextStream
' /// Top Of Body ///
On Local Error Goto exitFunction
' determine if input file is UTF16 or not
fp = FreeFile
If Open(wszFileName For Binary Access Read As #fp) = 0 Then
If Lof(fp) > 1 Then
Get #fp,1,zTemp
If zTemp[0] = 254 and zTemp[1] = 255 Then ' Found UTF-16 BOM Big Endian
encodingType = "UTF16"
EndIf
If zTemp[0] = 255 and zTemp[1] = 254 Then ' Found UTF-16 BOM Little Endian
encodingType = "UTF16"
EndIf
EndIf
Close #fp
EndIf
fp = FreeFile
If encodingType = "UTF16" Then
'https://github.com/JoseRoca/WinFBX/blob/master/docs/File%20Management/CTextStream%20Class.md#OpenForInputW
IF pTextStream.OpenForInputW(wszFileName) = S_OK THEN
cbsBuf = ""
DO UNTIL pTextStream.EOS
cbsBuf = pTextStream.ReadLine
If InStr(cbsBuf,CR) Then
' we found an embedded CR
Erase wszLocalArray
' fix trailing line-enders
If Right(cbsBuf,2) = CRLF Then cbsBuf = Left(cbsBuf,Len(cbsBuf)-2)
If Right(cbsBuf,1) = CR Then cbsBuf = Left(cbsBuf,Len(cbsBuf)-1)
If Right(cbsBuf,1) = LF Then cbsBuf = Left(cbsBuf,Len(cbsBuf)-1)
' correct any embedded CRLF or LF characters
If AfxStrTally(cbsBuf,CRLF) > 0 Then cbsBuf = AfxStrReplace(cbsBuf,CRLF,CR)
If AfxStrTally(cbsBuf,LF) > 0 Then cbsBuf = AfxStrReplace(cbsBuf,LF,CR)
' create an array from it
jSplitString(cbsBuf, wszLocalArray(), CR)
For x = 0 To UBound(wszLocalArray)
' add those to wszArray
AfxCwstrArrayAppend( wszArray(), wszLocalArray(x) )
lNumLines += 1
Next x
Else
' add it to wszArray
AfxCwstrArrayAppend( wszArray(), cbsBuf )
lNumLines += 1
EndIf
LOOP
pTextStream.Close
Else
? "Error loading file '" & wszFileName & "' into memory (JLF UNICODE)."
goto exitFunction
EndIf
Else ' ANSI
'https://github.com/JoseRoca/WinFBX/blob/master/docs/File%20Management/CTextStream%20Class.md#OpenForInputA
IF pTextStream.OpenForInputA(wszFileName) = S_OK THEN
cbsBuf = ""
DO UNTIL pTextStream.EOS
cbsBuf = pTextStream.ReadLine
If InStr(cbsBuf,CR) Then
' we found an embedded CR
Erase wszLocalArray
' fix trailing line-enders
If Right(cbsBuf,2) = CRLF Then cbsBuf = Left(cbsBuf,Len(cbsBuf)-2)
If Right(cbsBuf,1) = CR Then cbsBuf = Left(cbsBuf,Len(cbsBuf)-1)
If Right(cbsBuf,1) = LF Then cbsBuf = Left(cbsBuf,Len(cbsBuf)-1)
' correct any embedded CRLF or LF characters
If AfxStrTally(cbsBuf,CRLF) > 0 Then cbsBuf = AfxStrReplace(cbsBuf,CRLF,CR)
If AfxStrTally(cbsBuf,LF) > 0 Then cbsBuf = AfxStrReplace(cbsBuf,LF,CR)
' create an array from it
jSplitString(cbsBuf, wszLocalArray(), CR)
For x = 0 To UBound(wszLocalArray)
' add those to wszArray
AfxCwstrArrayAppend( wszArray(), wszLocalArray(x) )
lNumLines += 1
Next x
Else
' add it to wszArray
AfxCwstrArrayAppend( wszArray(), cbsBuf )
lNumLines += 1
EndIf
LOOP
pTextStream.Close
Else
? "Error loading file '" & wszFileName & "' into memory (JLF ANSI)."
goto exitFunction
EndIf
EndIf
' /// End Of Body ///
goto endFunction
exitFunction:
Return -1 ' return -1 if error
endFunction:
Return lNumLines ' return # of lines found
End Function
' =============================================================================
Function WinMain( _
ByVal hInstance As HINSTANCE, _
ByVal hPrevInstance As HINSTANCE, _
ByVal szCmdLine As ZString Ptr, _
ByVal nCmdShow As Long _
) As Long
Dim As LONG lineCount
ReDim As CWSTR wszLines(Any)
If Len(Command(1)) And AfxFileExists(Command(1)) Then
lineCount = jLoadFile(Command(1), wszLines())
? : ? "Found '" & lineCount & "' lines in file '" & Command(1) & "'"
Else
? : ? "Usage: jLoadFile.exe [filename]"
Return -1
EndIf
WinMain = 0
End Function
' =============================================================================
End WinMain(GetModuleHandle(Null), Null, Command(), SW_NORMAL)
' =============================================================================