by マティ » 2005年12月28日(水) 12:48
omasuさんごめんなさい、ファイルサイズと取得する処理を手抜きしていました。
前回投稿したプログラムでは、ファイルサイズが2GBまでしか処理出来ませんでした。
コード: 全て選択
'宣言の修正
' Dim nSize As Long
Dim nSize As Int64, nSizeLow As DWord '修正&追加
'ロジックの修正
' nSize = GetFileSize( hFileLoad, NULL )
nSizeLow = GetFileSize( hFileLoad, VarPtr(nSize)+4 )
nSize = nSize + nSizeLow
これで、実行可能になると思います。
クイックソート改4の改良版も載せておきます [ここをクリックすると内容が表示されます] [ここをクリックすると非表示にします]
コード: 全て選択
Declare Function timeGetTime Lib "winmm.dll" () As Long
'#Strict
'===============================================================================
' 0.01 標準関数のみで処理を作成
' 0.04 関数の展開を行う
' 0.05 ロジックの役割を変える
' 0.06 分割処理の書込み方法を変更
' 0.07 書込みバッファーの採用
'===============================================================================
'Const WriteLine=2048
Const WriteLine=4096
'Const xReSort=10240 '
'Const xReSort=20480 '256MB
'Const xReSort=51200 '
'Const xReSort=102400 '
'Const xReSort=153600 '
'Const xReSort=204800 '512MB
'Const xReSort=256000 '
Const xReSort=307200 '
Const LineLength = 158 '一行のバイト数
Const MarBuffer = xReSort * LineLength '4KB倍数にする事
Const WriteBurrer = WriteLine * LineLength '4KB倍数になる最小値
'Dim Dummy(72) As byte
Dim rBuffer(MarBuffer -1) As Byte 'Read Buffer's
Dim wBuffer(255 * WriteBurrer -1) As Byte 'WriteBuffer's '0.07
'LineLengthが255を超えると追加のワークを作成する必要がある
'Dim wBufferAdd(MarBuffer-(255 * WriteBurrer)) As Byte
Dim wBufAdr(255) As Long '0.07
Dim xFile (255) As Byte '0.06対象の有無
Dim xKeys (xReSort) As Byte '0.06対象キー
Dim MasterKey(LineLength) As Byte
Dim SortIndex(xReSort) As BytePtr
Dim SortKey(100)=[1,2,3,4,5,7,8,9,10,11,-1] As Char '列位置指定(-1で終了)
Dim StartTime As Long
Dim EndTime As Long
Dim xInFile As String
Dim xOutFile As String
Dim xWorkFile As String
'
Dim hFileLoad As HFILE
Dim hFileSave As HFILE
Dim nLoad As Long
Dim nSave As Long
' ---------------------------------------------
' 初期値を設定
' ---------------------------------------------
MkDir "c:\tmp"
xInFile = "c:\infile50000000.csv"
' xInFile = "d:\csv\infile8000000.csv"
' xInFile = "d:\csv\infile1000000.csv"
' xInFile = "d:\csv\infile1000.csv"
' xInFile = "d:\csv\infile100.csv"
' xInFile = "d:\csv\infile10.csv"
xOutFile = "OutPut.csv"
xWorkFile = "c:\tmp\sort"
' ---------------------------------------------
' ソート&ファイル書込み
' ---------------------------------------------
StartTime = timeGetTime()
' hFileSave = CreateFile(xOutFile,GENERIC_WRITE,0,ByVal NULL,CREATE_ALWAYS,FILE_ATTRIBUTE_NORMAL,NULL) 'Write
hFileSave = CreateFile(xOutFile,GENERIC_WRITE,0,ByVal NULL,CREATE_ALWAYS,FILE_ATTRIBUTE_NORMAL + FILE_FLAG_SEQUENTIAL_SCAN,NULL) 'Write
GigaSort( 0, xInFile )
CloseHandle( hFileSave )
EndTime = timeGetTime()
MsgBox 0, Str$( EndTime - StartTime )&"ms"
End
'===============================================================================
' ファイルを用いたソート方法
'===============================================================================
Function GigaSort(xNo As Integer, xFileName As String)As Integer
Dim i As Long
Dim x As Long
Dim z As Long '分割用アドレス
Dim r As BytePtr 'Read
Dim w As BytePtr 'Write
' Dim p As BytePtr 'pointer
'
Dim Index As Integer
Dim wFile As String 'ワークファイル名
' Dim nSize As Long
Dim nSize As Int64, nSizeLow As DWord
Dim nRead As Long
Dim hFile(255) As HFILE '書込みファイルハンドル
'-------------------------------------------------------
' ファイル処理(読込みファイルの作成作業)
'-------------------------------------------------------
If(xNo)Then wFile = xFileName Else wFile = xWorkFile
'
hFileLoad = CreateFile( xFileName, _
GENERIC_READ , _
0, _
ByVal NULL, _
OPEN_EXISTING, _
FILE_ATTRIBUTE_NORMAL + FILE_FLAG_SEQUENTIAL_SCAN, _
NULL) 'Read
' nSize = GetFileSize( hFileLoad, NULL )
nSizeLow = GetFileSize( hFileLoad, VarPtr(nSize)+4 )
nSize = nSize + nSizeLow
'-------------------------------------------------------
' ソート条件確認
'-------------------------------------------------------
Index=SortKey[xNo]-1
If(Index < 0)Then 'ソート不能、全て書き込む
SortOut( nSize )
If(xNo)Then DeleteFile(xFileName)
Exit Function
End If
If(nSize <= MarBuffer)Then 'メモリー内でソートが出来る場合
QuickSortSetup( xNo, nSize )
If(xNo)Then DeleteFile(xFileName)
Exit Function
End If
'-------------------------------------------------------
' 中間ファイルを用いて分割を行う処理
'-------------------------------------------------------
For i=0 To 255: wBufAdr=0: Next
'----------------------------------------
' 中間ファイル作成!
'----------------------------------------
While( nSize > 0 )
If( nSize > MarBuffer )Then nRead=MarBuffer Else nRead=nSize
ReadFile( hFileLoad, rBuffer As BytePtr, nRead, VarPtr(nLoad), ByVal 0)
'--------------------------------------------------
'インデックス作成
'--------------------------------------------------
r=rBuffer As BytePtr
For i=0 To ( nRead \ LineLength ) - 1
x=r[Index]: xKeys=x: xFile[x]=1: r=r+LineLength
Next
'--------------------------------------------------
'ファイル処理
'--------------------------------------------------
w = wBuffer As BytePtr
For x=0 To 255
If( xFile[x] )Then
If(hFile[x] = 0)Then
hFile[x] = CreateFile( wFile + Chr$(x), _
GENERIC_WRITE, 0, ByVal NULL, CREATE_ALWAYS, _
FILE_ATTRIBUTE_NORMAL + FILE_FLAG_SEQUENTIAL_SCAN, _
NULL) 'Write
End If
'
r=rBuffer As BytePtr
z=wBufAdr[x]
For i=0 To ( nRead \ LineLength ) - 1
If( xKeys = x )Then
memcpy((w+z), r, LineLength) '基準k
z = z + LineLength
If(z >= WriteBurrer)Then
WriteFile(hFile[x], w, z, VarPtr(nSave), ByVal 0)
z=0
End If
End If
r=r+LineLength
Next i
wBufAdr[x]=z
End If
w = w + WriteBurrer
Next
nSize = nSize - nRead
Wend
'----------------------------------------
' 中間ファイルは閉じると完成する!
'----------------------------------------
w = wBuffer As BytePtr
For x=0 To 255
If(hFile[x])Then
z=wBufAdr[x]
If(z>0)Then
WriteFile(hFile[x], w, z, VarPtr(nSave), ByVal 0)
End If
CloseHandle( hFile[x] )
End If
w = w + WriteBurrer
Next
CloseHandle( hFileLoad )
'----------------------------------------
' 再度ソートを行う処理を呼び出す
'----------------------------------------
If(xNo)Then DeleteFile(xFileName)
For i=0 To 255
If(hFile)Then GigaSort(xNo+1, wFile + Chr$(i)) '再帰処理
Next
End Function
'-----------------------------------------------------------
' ソート結果の書込み(最後まで絞り切れない場合に実行)
'-----------------------------------------------------------
Sub SortOut(nSize As DWord)
While( nSize > MarBuffer )
ReadFile( hFileLoad, rBuffer As BytePtr, MarBuffer, VarPtr(nLoad), ByVal 0)
WriteFile(hFileSave, rBuffer As BytePtr, MarBuffer, VarPtr(nSave), ByVal 0)
nSize = nSize - MarBuffer
Wend
ReadFile( hFileLoad, rBuffer As BytePtr, nSize, VarPtr(nLoad), ByVal 0)
WriteFile(hFileSave, rBuffer As BytePtr, nSize, VarPtr(nSave), ByVal 0)
CloseHandle( hFileLoad )
End Sub
'===============================================================================
' メモリーを用いたソート
'===============================================================================
Sub QuickSortSetup( xNo As Integer, nSize As DWord )
Dim i As Long
Dim p As BytePtr
Dim nCount As Long
'
ReadFile( hFileLoad, rBuffer As BytePtr, nSize, VarPtr(nLoad), ByVal 0)
CloseHandle( hFileLoad )
nCount = ( nSize \ LineLength ) - 1
' If(nCount<=0)Then Debug:exit function
' If(nCount>=0)Then
p=rBuffer As BytePtr
For i=0 To nCount
SortIndex = p: p=p+LineLength
Next
'
QuickSort(xNo, 0, nCount ) 'ソート処理
'
p=wBuffer As BytePtr
For i=0 To nCount
memcpy(p, SortIndex, LineLength) '基準k
p=p+LineLength
Next
WriteFile(hFileSave, wBuffer, nSize, VarPtr(nSave), ByVal 0)
' End If
End Sub
'----------------------------------------------------------
' クイックソート
'----------------------------------------------------------
Sub QuickSort( xNo As Integer, Kara As Long, Made As Long)
Dim i As Long
Dim j As Long
Dim w As BytePtr
Dim k As BytePtr
'
memcpy(MasterKey As BytePtr, SortIndex[(Kara+Made)\2], LineLength) '基準k
i=Kara : j=Made: k=MasterKey As BytePtr
Do
While KeyCheck( xNo, SortIndex , k ): i=i+1: Wend
While KeyCheck( xNo, k , SortIndex[j] ): j=j-1: Wend
If (i <= j) Then
w = SortIndex: SortIndex = SortIndex[j]: SortIndex[j] = w
i=i+1 : j=j-1
End If
Loop Until (i > j)
If (Kara < j) Then QuickSort(xNo, Kara, j)
If (i < Made) Then QuickSort(xNo, i, Made)
End Sub
'----------------------------------------------------------
' 比較処理( L<R なら -1 )
'----------------------------------------------------------
Function KeyCheck( xNo As Integer, L As BytePtr, R As BytePtr ) As Long
Dim x As Long
Do
x=SortKey[xNo]-1: If( x<0 )Then Exit Function
If( L[x] < R[x] )Then Exit Do
If( L[x] > R[x] )Then Exit Function
xNo = xNo + 1
Loop
KeyCheck = -1
End Function
'===============================================================================
' コード終了
'===============================================================================
PS.テストデータを作成するのに9時間もかかってしまいました。
[b]omasuさん[/b]ごめんなさい、ファイルサイズと取得する処理を手抜きしていました。
前回投稿したプログラムでは、ファイルサイズが2GBまでしか処理出来ませんでした。
[code]
'宣言の修正
' Dim nSize As Long
Dim nSize As Int64, nSizeLow As DWord '修正&追加
'ロジックの修正
' nSize = GetFileSize( hFileLoad, NULL )
nSizeLow = GetFileSize( hFileLoad, VarPtr(nSize)+4 )
nSize = nSize + nSizeLow
[/code]
これで、実行可能になると思います。
[hide=クイックソート改4の改良版も載せておきます]
[code]
Declare Function timeGetTime Lib "winmm.dll" () As Long
'#Strict
'===============================================================================
' 0.01 標準関数のみで処理を作成
' 0.04 関数の展開を行う
' 0.05 ロジックの役割を変える
' 0.06 分割処理の書込み方法を変更
' 0.07 書込みバッファーの採用
'===============================================================================
'Const WriteLine=2048
Const WriteLine=4096
'Const xReSort=10240 '
'Const xReSort=20480 '256MB
'Const xReSort=51200 '
'Const xReSort=102400 '
'Const xReSort=153600 '
'Const xReSort=204800 '512MB
'Const xReSort=256000 '
Const xReSort=307200 '
Const LineLength = 158 '一行のバイト数
Const MarBuffer = xReSort * LineLength '4KB倍数にする事
Const WriteBurrer = WriteLine * LineLength '4KB倍数になる最小値
'Dim Dummy(72) As byte
Dim rBuffer(MarBuffer -1) As Byte 'Read Buffer's
Dim wBuffer(255 * WriteBurrer -1) As Byte 'WriteBuffer's '0.07
'LineLengthが255を超えると追加のワークを作成する必要がある
'Dim wBufferAdd(MarBuffer-(255 * WriteBurrer)) As Byte
Dim wBufAdr(255) As Long '0.07
Dim xFile (255) As Byte '0.06対象の有無
Dim xKeys (xReSort) As Byte '0.06対象キー
Dim MasterKey(LineLength) As Byte
Dim SortIndex(xReSort) As BytePtr
Dim SortKey(100)=[1,2,3,4,5,7,8,9,10,11,-1] As Char '列位置指定(-1で終了)
Dim StartTime As Long
Dim EndTime As Long
Dim xInFile As String
Dim xOutFile As String
Dim xWorkFile As String
'
Dim hFileLoad As HFILE
Dim hFileSave As HFILE
Dim nLoad As Long
Dim nSave As Long
' ---------------------------------------------
' 初期値を設定
' ---------------------------------------------
MkDir "c:\tmp"
xInFile = "c:\infile50000000.csv"
' xInFile = "d:\csv\infile8000000.csv"
' xInFile = "d:\csv\infile1000000.csv"
' xInFile = "d:\csv\infile1000.csv"
' xInFile = "d:\csv\infile100.csv"
' xInFile = "d:\csv\infile10.csv"
xOutFile = "OutPut.csv"
xWorkFile = "c:\tmp\sort"
' ---------------------------------------------
' ソート&ファイル書込み
' ---------------------------------------------
StartTime = timeGetTime()
' hFileSave = CreateFile(xOutFile,GENERIC_WRITE,0,ByVal NULL,CREATE_ALWAYS,FILE_ATTRIBUTE_NORMAL,NULL) 'Write
hFileSave = CreateFile(xOutFile,GENERIC_WRITE,0,ByVal NULL,CREATE_ALWAYS,FILE_ATTRIBUTE_NORMAL + FILE_FLAG_SEQUENTIAL_SCAN,NULL) 'Write
GigaSort( 0, xInFile )
CloseHandle( hFileSave )
EndTime = timeGetTime()
MsgBox 0, Str$( EndTime - StartTime )&"ms"
End
'===============================================================================
' ファイルを用いたソート方法
'===============================================================================
Function GigaSort(xNo As Integer, xFileName As String)As Integer
Dim i As Long
Dim x As Long
Dim z As Long '分割用アドレス
Dim r As BytePtr 'Read
Dim w As BytePtr 'Write
' Dim p As BytePtr 'pointer
'
Dim Index As Integer
Dim wFile As String 'ワークファイル名
' Dim nSize As Long
Dim nSize As Int64, nSizeLow As DWord
Dim nRead As Long
Dim hFile(255) As HFILE '書込みファイルハンドル
'-------------------------------------------------------
' ファイル処理(読込みファイルの作成作業)
'-------------------------------------------------------
If(xNo)Then wFile = xFileName Else wFile = xWorkFile
'
hFileLoad = CreateFile( xFileName, _
GENERIC_READ , _
0, _
ByVal NULL, _
OPEN_EXISTING, _
FILE_ATTRIBUTE_NORMAL + FILE_FLAG_SEQUENTIAL_SCAN, _
NULL) 'Read
' nSize = GetFileSize( hFileLoad, NULL )
nSizeLow = GetFileSize( hFileLoad, VarPtr(nSize)+4 )
nSize = nSize + nSizeLow
'-------------------------------------------------------
' ソート条件確認
'-------------------------------------------------------
Index=SortKey[xNo]-1
If(Index < 0)Then 'ソート不能、全て書き込む
SortOut( nSize )
If(xNo)Then DeleteFile(xFileName)
Exit Function
End If
If(nSize <= MarBuffer)Then 'メモリー内でソートが出来る場合
QuickSortSetup( xNo, nSize )
If(xNo)Then DeleteFile(xFileName)
Exit Function
End If
'-------------------------------------------------------
' 中間ファイルを用いて分割を行う処理
'-------------------------------------------------------
For i=0 To 255: wBufAdr[i]=0: Next
'----------------------------------------
' 中間ファイル作成!
'----------------------------------------
While( nSize > 0 )
If( nSize > MarBuffer )Then nRead=MarBuffer Else nRead=nSize
ReadFile( hFileLoad, rBuffer As BytePtr, nRead, VarPtr(nLoad), ByVal 0)
'--------------------------------------------------
'インデックス作成
'--------------------------------------------------
r=rBuffer As BytePtr
For i=0 To ( nRead \ LineLength ) - 1
x=r[Index]: xKeys[i]=x: xFile[x]=1: r=r+LineLength
Next
'--------------------------------------------------
'ファイル処理
'--------------------------------------------------
w = wBuffer As BytePtr
For x=0 To 255
If( xFile[x] )Then
If(hFile[x] = 0)Then
hFile[x] = CreateFile( wFile + Chr$(x), _
GENERIC_WRITE, 0, ByVal NULL, CREATE_ALWAYS, _
FILE_ATTRIBUTE_NORMAL + FILE_FLAG_SEQUENTIAL_SCAN, _
NULL) 'Write
End If
'
r=rBuffer As BytePtr
z=wBufAdr[x]
For i=0 To ( nRead \ LineLength ) - 1
If( xKeys[i] = x )Then
memcpy((w+z), r, LineLength) '基準k
z = z + LineLength
If(z >= WriteBurrer)Then
WriteFile(hFile[x], w, z, VarPtr(nSave), ByVal 0)
z=0
End If
End If
r=r+LineLength
Next i
wBufAdr[x]=z
End If
w = w + WriteBurrer
Next
nSize = nSize - nRead
Wend
'----------------------------------------
' 中間ファイルは閉じると完成する!
'----------------------------------------
w = wBuffer As BytePtr
For x=0 To 255
If(hFile[x])Then
z=wBufAdr[x]
If(z>0)Then
WriteFile(hFile[x], w, z, VarPtr(nSave), ByVal 0)
End If
CloseHandle( hFile[x] )
End If
w = w + WriteBurrer
Next
CloseHandle( hFileLoad )
'----------------------------------------
' 再度ソートを行う処理を呼び出す
'----------------------------------------
If(xNo)Then DeleteFile(xFileName)
For i=0 To 255
If(hFile[i])Then GigaSort(xNo+1, wFile + Chr$(i)) '再帰処理
Next
End Function
'-----------------------------------------------------------
' ソート結果の書込み(最後まで絞り切れない場合に実行)
'-----------------------------------------------------------
Sub SortOut(nSize As DWord)
While( nSize > MarBuffer )
ReadFile( hFileLoad, rBuffer As BytePtr, MarBuffer, VarPtr(nLoad), ByVal 0)
WriteFile(hFileSave, rBuffer As BytePtr, MarBuffer, VarPtr(nSave), ByVal 0)
nSize = nSize - MarBuffer
Wend
ReadFile( hFileLoad, rBuffer As BytePtr, nSize, VarPtr(nLoad), ByVal 0)
WriteFile(hFileSave, rBuffer As BytePtr, nSize, VarPtr(nSave), ByVal 0)
CloseHandle( hFileLoad )
End Sub
'===============================================================================
' メモリーを用いたソート
'===============================================================================
Sub QuickSortSetup( xNo As Integer, nSize As DWord )
Dim i As Long
Dim p As BytePtr
Dim nCount As Long
'
ReadFile( hFileLoad, rBuffer As BytePtr, nSize, VarPtr(nLoad), ByVal 0)
CloseHandle( hFileLoad )
nCount = ( nSize \ LineLength ) - 1
' If(nCount<=0)Then Debug:exit function
' If(nCount>=0)Then
p=rBuffer As BytePtr
For i=0 To nCount
SortIndex[i] = p: p=p+LineLength
Next
'
QuickSort(xNo, 0, nCount ) 'ソート処理
'
p=wBuffer As BytePtr
For i=0 To nCount
memcpy(p, SortIndex[i], LineLength) '基準k
p=p+LineLength
Next
WriteFile(hFileSave, wBuffer, nSize, VarPtr(nSave), ByVal 0)
' End If
End Sub
'----------------------------------------------------------
' クイックソート
'----------------------------------------------------------
Sub QuickSort( xNo As Integer, Kara As Long, Made As Long)
Dim i As Long
Dim j As Long
Dim w As BytePtr
Dim k As BytePtr
'
memcpy(MasterKey As BytePtr, SortIndex[(Kara+Made)\2], LineLength) '基準k
i=Kara : j=Made: k=MasterKey As BytePtr
Do
While KeyCheck( xNo, SortIndex[i] , k ): i=i+1: Wend
While KeyCheck( xNo, k , SortIndex[j] ): j=j-1: Wend
If (i <= j) Then
w = SortIndex[i]: SortIndex[i] = SortIndex[j]: SortIndex[j] = w
i=i+1 : j=j-1
End If
Loop Until (i > j)
If (Kara < j) Then QuickSort(xNo, Kara, j)
If (i < Made) Then QuickSort(xNo, i, Made)
End Sub
'----------------------------------------------------------
' 比較処理( L<R なら -1 )
'----------------------------------------------------------
Function KeyCheck( xNo As Integer, L As BytePtr, R As BytePtr ) As Long
Dim x As Long
Do
x=SortKey[xNo]-1: If( x<0 )Then Exit Function
If( L[x] < R[x] )Then Exit Do
If( L[x] > R[x] )Then Exit Function
xNo = xNo + 1
Loop
KeyCheck = -1
End Function
'===============================================================================
' コード終了
'===============================================================================
[/code]
[/hide]
[color=red]PS.テストデータを作成するのに9時間もかかってしまいました。[/color]