• Bitfield for more effiecient data structure

    From Bezoar@21:1/5 to All on Mon Mar 21 08:35:57 2022
    Buried down as a reply I did not know anyone had seen this so posting this at the top.

    You can use a bitfield to hold the values. A bitfield basically holds a 1 or 0 at an index. So the string "001001" as an example would have a value at 2 and 5. So just using a string you can get ( assuming one byte per character and storing a bit string
    for each value and each attempt , in your case : 2-12 or 10 possible dice values and 7 attempts we get 70 bit strings ) this gives us 25M x 70 or 1750M or 1.75 Gig . But wait we know that a byte is made up of 1's and 0's so each one byte character can
    hold 8 values. So using bitwise operators we can cut the memory usage by 8 times or 1750 / 8 = 218.75M! which is significantly smaller than 9 G. But wait! we can also "run length encode" the string where any length of ones and zeros can be compressed to
    a number and a value. For example, a string of all 1's 32 long can be compressed to "32 1" so we can do the same and depending on your data can reduce the memory significantly. In the implementation below ( I have objectified what I borrowed from the tcl
    wiki ) we don't implement the run length encoding ; we leave that to the student as they say. I made timings for the implmentation below .

    tries attempts memory real
    -------- -------- -------- --------
    1000 7 7k 0.078s
    10000 7 70k 0.382s
    100000 7 700k 3.770s
    1000000 7 7M 79.58s

    I would expect that for a 10 times increase in tries we'd get a 10 times increase in time spent but as we see after 100000 the time needed to seek to the correct spot causes significant slow down. To speed it up one may want to split up the string into
    several smaller strings especially when you pre-allocate then its just math to get the correct index into the correct string to update the value.

    oo::class create Bitfield {
    # get and set taken from https://wiki.tcl-lang.org/page/bitstrings
    variable data
    variable maxposition
    variable lastfree

    constructor { { numbits -1 } } {
    set data ""
    set maxposition $numbits
    set lastfree 0
    if { $numbits >= 0 } {
    # prealloc space
    set numbytes [ expr { $numbits / 8 + (( $numbits % 8 ) ? 1 : 0 ) } ]
    set data [ string repeat "\0" $numbytes ]
    set maxposition [ expr $numbytes * 8 ]
    }
    }
    method alloc { } {
    set pos $lastfree
    try {
    while { [ my get $pos ] == 1 && ( $maxposition == -1 || $pos < $maxposition ) } {
    incr pos
    }
    } trap BAD_INDEX { a } {
    throw CAPACITY_ERROR " unable to allocate any more blocks!"
    }

    set lastfree [ expr $pos + 1 ]
    while { [ my get $lastfree ] && $lastfree != $maxposition } { incr lastfree } my set $pos 1
    #puts "allocated block $pos"
    return $pos
    }

    method free { pos } {
    my set $pos 0
    set lastfree $pos
    }

    method set { position bit } {
    #puts "Bitfield: set $position -> $bit "
    if { (! [string is integer $position]) || ($position < 0) } {
    throw BAD_INDEX "position must be an integer >= 0"
    }
    if { ($bit ne "0") && ($bit ne "1") } {
    throw BAD_VALUE "bit must be '0' or '1'"
    }
    set block [ expr { $position / 8 } ]
    set bitnum [ expr { $position % 8 } ]
    set byteval "00000000"
    binary scan $data @${block}b8 byteval

    # Compute new string of zeros and ones
    set newbyte [string range $byteval 0 [expr $bitnum-1]]$bit[string range $byteval [expr $bitnum+1] end]

    # Convert back into binary format
    set data [binary format a*@${block}b8 $data $newbyte]
    }

    method toggle { position } {
    set value [ my get $position ]
    my set $position [expr { ( $value + 1 ) % 2 } ]
    }
    method get { position } {

    if { (! [string is integer $position]) || ($position < 0) } {
    throw BAD_INDEX error "position must be an integer >= 0"
    }
    if { $maxposition >= 0 && $position >= $maxposition } {
    throw BAD_INDEX "position must be an integer < $maxposition"
    }
    # Compute byte and bit offsets
    set block [ expr { $position / 8 } ]
    set bitnum [expr $position % 8]

    # Scan the btye from the string
    # Default value is used if scan fails
    set byteval "00000000"
    binary scan $data @${block}b8 byteval
    # return the interesting bit
    set value [string range $byteval $bitnum $bitnum]
    return $value
    }
    method setData { buffer } {
    set data $buffer
    }
    method getData { } {
    return $data
    }
    method length { } {
    return [string length $data];
    }
    method capacity { } {
    return [expr { [string length $data ] * 8 } ]
    }
    method toString { } {
    set retval "[format "%10i " [my offset]]"
    foreach c [split $data "" ] {
    binary scan $c b8 x
    append retval "$x"
    }
    return $retval
    }
    method offset {} {
    return [ expr {[my length] + 11 }]
    }
    destructor {}
    }
    package provide Bitfield 1.0

    if { 0 } {
    set b [ Bitfield new ]
    $b set 4 1
    $b set 5 1
    $b toggle 6
    $b toggle 4
    $b set 25 1
    for { set i 0 } { $i < 30 } { incr i } {
    puts "$i . '[$b get $i ]'"
    }
    puts "length : [$b length ]"
    puts "capacity : [$b capacity ]"
    }
    # your code starts here
    package require math ; # from tcllib
    set nrOfTries 10000

    set nrOfRolls 7
    array set diceRoll {}
    for {set j 0} {$j < $nrOfRolls} {incr j} {
    for {set k 2 } { $k < 13 } { incr k } {
    set diceRoll($j,$k) [Bitfield new ${nrOfTries} ]
    }
    }
    proc rolls { trynum numRolls } {
    for {set i 0} {$i < $numRolls} {incr i} {
    set x [::math::random 1 $numRolls]
    incr x [::math::random 1 $numRolls]
    $::diceRoll($i,$x) set $trynum 1
    }
    }
    for {set j 0 } {$j < $::nrOfTries} {incr j} {
    rolls $j $::nrOfRolls
    if { [ expr { $j % 10000 } ] == 0 } {
    puts "$j"
    }
    }
    proc getRolls { tryNum } {
    set val {}
    for {set j 0} {$j < $::nrOfRolls } {incr j} {
    for {set k 2 } { $k < 13 } { incr k } {
    if { [$::diceRoll($j,$k) get $tryNum ] eq "1" } {
    lappend val $k
    }
    }
    }
    return $val
    }
    proc numTimes { value attempt } {
    lassign [$::diceRoll($attempt,$value) toString] capacityLen bitStr
    return [string length [string map { "0" "" } $bitStr ]]
    }
    puts "What were the rolls for 25th try?"
    puts "[join [getRolls 24] \n]"
    puts "How many times did 10 get rolled on the each attempt in $::nrOfTries tries?"
    puts " Attempt Num Times "
    puts " ======== =========="
    for { set m 0 } { $m < 7 } { incr m } {
    puts [format "% 7i % 5i" [expr { $m + 1 } ] [numTimes 10 $m ]]
    }

    Output :
    Forgot the output from the program:
    What were the rolls for 25 time?
    4
    5
    10
    6
    8
    11
    2
    How many times did 10 get rolled on the each attempt in 10000 tries?
    Attempt Num Times
    ======== ==========
    1 866
    2 858
    3 828
    4 784
    5 873
    6 862
    7 798

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)