Last time, I was uncertain about string concatenation, so I did a test:
#!/usr/bin/env julia
const kTestText = "abcdefghijklmnopqrstuvwxyz0123456789\n"
const kLoops = 10000
function stringString()
s = ""
for i in 1:kLoops
s = "$s$kTestText"
end
return s
end
function bufferString()
sb = IOBuffer()
for i in 1:kLoops
print(sb, kTestText)
end
return String(take!(sb))
end
function vectorString()
sb = Vector()
for i in 1:kLoops
push!(sb, kTestText)
end
return join(sb, "")
end
function typedVectorString()
sb = Vector{AbstractString}()
for i in 1:kLoops
push!(sb, kTestText)
end
return join(sb, "")
end
println("*** stringString")
@timev stringString()
sleep(2)
println("\n*** bufferString")
@timev bufferString()
sleep(2)
println("\n*** vectorString")
@timev vectorString()
sleep(2)
println("\n*** typedVectorString")
@timev typedVectorString()
*** stringString
1.100197 seconds (21.24 k allocations: 1.725 GiB, 15.94% gc time)
elapsed time (ns): 1100197167
gc time (ns): 175349222
bytes allocated: 1851904041
pool allocs: 11292
non-pool GC allocs:9950
GC pauses: 79
*** bufferString
0.006864 seconds (11.80 k allocations: 1.134 MiB)
elapsed time (ns): 6864042
bytes allocated: 1189493
pool allocs: 11794
non-pool GC allocs:3
realloc() calls: 8
*** vectorString
0.017380 seconds (26.68 k allocations: 2.191 MiB)
elapsed time (ns): 17380237
bytes allocated: 2297091
pool allocs: 26659
non-pool GC allocs:9
realloc() calls: 8
*** typedVectorString
0.031384 seconds (44.75 k allocations: 2.999 MiB, 10.00% gc time)
elapsed time (ns): 31384383
gc time (ns): 3137654
bytes allocated: 3144221
pool allocs: 44730
non-pool GC allocs:8
realloc() calls: 8
GC pauses: 1
Well, there's me told off. I expected #1 typedVector, #2 vector, #3 buffer, then stringString way at the bottom. Instead the first 3 are reversed.
IOBuffer, as ugly as it is, is the clear winner. Vector did OK, but twice as much CPU & RAM loses. Amusing that typedVector is twice as slow and memory-heavy as the untyped (explained ). On larger loops, buffer gets slower, but vector remains a memory pig, and in GC that's unacceptable. Of course stringString is terrible, and it's almost exactly the same for string(s, kTestText)
.
Time to rewrite some text processing.