gguf header loading

2025-01-07 13:03:58 -05:00 · 2025-01-07 13:03:58 -05:00 · 56594eb667
commit 56594eb667
parent 8e5546d772
10 changed files with 1251 additions and 0 deletions
--- a/llama.log
+++ b/llama.log
@ -0,0 +1 @@
 [1736213723] warming up the model with an empty run
--- a/src/main/java/org/studiorailgun/Main.java
+++ b/src/main/java/org/studiorailgun/Main.java
@ -1,6 +1,16 @@
 package org.studiorailgun;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
 import org.studiorailgun.interact.GameLoop;
 import org.studiorailgun.kobold.gguf.GGUFHeader;
 import org.studiorailgun.kobold.gguf.GGUFLoader;
 import org.studiorailgun.kobold.gguf.GGUFMetadataKV;
 import org.studiorailgun.kobold.gguf.GGUFModel;
 import org.studiorailgun.kobold.gguf.GGUFTensorInfo;
 /**
 * The main class
@ -11,6 +21,25 @@ public class Main {
     * The main method
     */
    public static void main(String[] args){
        try {
            GGUFLoader loader = new GGUFLoader();
            GGUFModel model = loader.load("C:\\Users\\satellite\\Documents\\ai\\koboldcpp\\Fimbulvetr-Kuro-Lotus-10.7B-Q6_K.gguf");
            System.out.println("Metadata: ");
            GGUFHeader header = model.getHeader();
            for(GGUFMetadataKV pair : header.getMetadataPairs()){
                System.out.println(pair.getKey() + " - " + pair.getValue_type() + " - " + pair.getValue());
            }
            System.out.println("\n");
            for(GGUFTensorInfo tensorInfo : model.getTensorInfos()){
                System.out.println(tensorInfo.getName() + " - " + tensorInfo.getType() + " - " + tensorInfo.getOffset());
            }
            System.exit(0);
        } catch (IOException e) {
            e.printStackTrace();
        }
        GameLoop.main();
    }
--- a/src/main/java/org/studiorailgun/kobold/gguf/GGUFHeader.java
+++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFHeader.java
@ -0,0 +1,43 @@
 package org.studiorailgun.kobold.gguf;
 import java.util.LinkedList;
 import java.util.List;
 /**
 * The header of a gguf file
 */
 public class GGUFHeader {
    protected int magic;
    protected int version;
    protected long tensorCount;
    protected long metadataKVCount;
    protected List<GGUFMetadataKV> metadataPairs = new LinkedList<GGUFMetadataKV>();
    public int getMagic() {
        return magic;
    }
    public int getVersion() {
        return version;
    }
    public long getTensorCount() {
        return tensorCount;
    }
    public long getMetadataKVCount() {
        return metadataKVCount;
    }
    public List<GGUFMetadataKV> getMetadataPairs() {
        return metadataPairs;
    }
 }
--- a/src/main/java/org/studiorailgun/kobold/gguf/GGUFLoader.java
+++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFLoader.java
@ -0,0 +1,415 @@
 package org.studiorailgun.kobold.gguf;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import org.studiorailgun.kobold.gguf.GGUFMetadataKV.GGUFMetadataValue;
 /**
 * Loads a gguf format model
 */
 public class GGUFLoader {
    //gguf core explanation: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md
    /**
     * The default alignment for the file
     */
    static final int DEFAULT_ALIGNMENT = 32;
    /**
     * Used for converting bytes read in to little endian
     */
    ByteBuffer readerBuff = ByteBuffer.allocate(8);
    /**
     * The alignment of the file
     */
    int alignment = DEFAULT_ALIGNMENT;
    /**
     * The number of bytes read -- used for keeping track of alignment
     */
    long bytesRead = 0;
    /**
     * Constructor
     */
    public GGUFLoader(){
        readerBuff.order(ByteOrder.LITTLE_ENDIAN);
    }
    /**
     * Loads the gguf model at the specified path
     * @param path The path
     */
    public GGUFModel load(String path) throws IOException {
        InputStream stream = Files.newInputStream(new File(path).toPath());
        GGUFModel rVal = new GGUFModel();
        //read the header
        rVal.header = this.readHeader(stream);
        //read the tensor info
        for(int i = 0; i < rVal.header.tensorCount; i++){
            rVal.tensorInfos.add(this.readTensorInfo(stream));
        }
        //read padding
        while(bytesRead % (alignment / 8) > 0){
            stream.read();
            bytesRead++;
        }
        //read the tensor data
        for(int i = 0; i < rVal.tensorInfos.size(); i++){
            GGUFTensorInfo tensorInfo = rVal.tensorInfos.get(i);
            rVal.tensorData.add(this.readTensorData(stream,tensorInfo));
        }
        stream.close();
        return rVal;
    }
    /**
     * Reads a GGUF header from an input stream
     * @param stream The stream
     * @return The header
     */
    private GGUFHeader readHeader(InputStream stream) throws IOException {
        GGUFHeader rVal = new GGUFHeader();
        rVal.magic = this.streamReadInt(stream);
        rVal.version = this.streamReadInt(stream);
        rVal.tensorCount = this.streamReadLong(stream);
        rVal.metadataKVCount = this.streamReadLong(stream);
        System.out.println(rVal.magic + " " + rVal.version + " " + rVal.tensorCount + " " + rVal.metadataKVCount);
        //read in the metadata kv pairs
        for(int i = 0; i < rVal.metadataKVCount; i++){
            rVal.metadataPairs.add(this.readMetadataPair(stream));
        }
        return rVal;
    }
    /**
     * Reads in a GGUF metadata pair
     * @param stream The stream
     * @return The pair
     */
    private GGUFMetadataKV readMetadataPair(InputStream stream) throws IOException {
        GGUFMetadataKV pair = new GGUFMetadataKV();
        pair.key = this.readGGUFString(stream);
        int typeRaw = this.streamReadInt(stream);
        switch(typeRaw){
            case 0: {
                pair.value_type = GGUFMetadataValue.UINT8;
                pair.value = this.streamReadChar(stream);
            } break;
            case 1: {
                pair.value_type = GGUFMetadataValue.INT8;
                pair.value = this.streamReadChar(stream);
            } break;
            case 2: {
                pair.value_type = GGUFMetadataValue.UINT16;
                pair.value = this.streamReadShort(stream);
            } break;
            case 3: {
                pair.value_type = GGUFMetadataValue.INT16;
                pair.value = this.streamReadShort(stream);
            } break;
            case 4: {
                pair.value_type = GGUFMetadataValue.UINT32;
                pair.value = this.streamReadInt(stream);
            } break;
            case 5: {
                pair.value_type = GGUFMetadataValue.INT32;
                pair.value = this.streamReadInt(stream);
            } break;
            case 6: {
                pair.value_type = GGUFMetadataValue.FLOAT32;
                pair.value = this.streamReadFloat(stream);
            } break;
            case 7: {
                pair.value_type = GGUFMetadataValue.BOOL;
                pair.value = this.streamReadChar(stream);
            } break;
            case 8: {
                pair.value_type = GGUFMetadataValue.STRING;
                pair.value = this.readGGUFString(stream);
            } break;
            case 9: {
                pair.value_type = GGUFMetadataValue.ARRAY;
                pair.value = this.readMetadataArray(stream);
            } break;
            case 10: {
                pair.value_type = GGUFMetadataValue.UINT64;
                pair.value = this.streamReadLong(stream);
            } break;
            case 11: {
                pair.value_type = GGUFMetadataValue.INT64;
                pair.value = this.streamReadLong(stream);
            } break;
            case 12: {
                pair.value_type = GGUFMetadataValue.DOUBLE;
                pair.value = this.streamReadDouble(stream);
            } break;
            default: {
                throw new Error("Invalid metadata pair data type! " + typeRaw);
            }
        }
        return pair;
    }
    /**
     * Reads a metadata array value
     * @param stream The stream
     * @return The array value
     * @throws IOException Thrown if the stream has an io exception
     */
    private Object readMetadataArray(InputStream stream) throws IOException {
        int typeRaw = this.streamReadInt(stream);
        int len = (int)this.streamReadLong(stream);
        switch(typeRaw){
            case 0:
            case 1: {
                char[] rVal = new char[len];
                for(int i = 0; i < len; i++){
                    rVal[i] = this.streamReadChar(stream);
                }
                return rVal;
            }
            case 2:
            case 3: {
                short[] rVal = new short[len];
                for(int i = 0; i < len; i++){
                    rVal[i] = this.streamReadShort(stream);
                }
                return rVal;
            }
            case 4:
            case 5: {
                int[] rVal = new int[len];
                for(int i = 0; i < len; i++){
                    rVal[i] = this.streamReadInt(stream);
                }
                return rVal;
            }
            case 6: {
                float[] rVal = new float[len];
                for(int i = 0; i < len; i++){
                    rVal[i] = this.streamReadFloat(stream);
                }
                return rVal;
            }
            case 7: {
                boolean[] rVal = new boolean[len];
                for(int i = 0; i < len; i++){
                    rVal[i] = this.streamReadChar(stream) > 0;
                }
                return rVal;
            }
            case 8: {
                String[] rVal = new String[len];
                for(int i = 0; i < len; i++){
                    rVal[i] = this.readGGUFString(stream);
                }
                return rVal;
            }
            case 9: {
                Object[] rVal = new Object[len];
                for(int i = 0; i < len; i++){
                    rVal[i] = this.readMetadataArray(stream);
                }
                return rVal;
            }
            case 10:
            case 11: {
                long[] rVal = new long[len];
                for(int i = 0; i < len; i++){
                    rVal[i] = this.streamReadLong(stream);
                }
                return rVal;
            }
            case 12: {
                double[] rVal = new double[len];
                for(int i = 0; i < len; i++){
                    rVal[i] = this.streamReadDouble(stream);
                }
                return rVal;
            }
            default: {
                throw new Error("Failed to read metadata array -- invalid type! " + typeRaw);
            }
        }
    }
    /**
     * Reads info on a tensor from the file
     * @param stream The stream
     * @return The tensor info
     * @throws IOException Thrown if the stream fails to read
     */
    private GGUFTensorInfo readTensorInfo(InputStream stream) throws IOException {
        GGUFTensorInfo rVal = new GGUFTensorInfo();
        rVal.name = this.readGGUFString(stream);
        rVal.nDimensions = this.streamReadInt(stream);
        rVal.dimensions = new long[rVal.nDimensions];
        for(int i = 0; i < rVal.nDimensions; i++){
            rVal.dimensions[i] = this.streamReadLong(stream);
        }
        rVal.type = GGUFTensorInfo.getType(this.streamReadInt(stream));
        rVal.offset = this.streamReadLong(stream);
        return rVal;
    }
    /**
     * Reads the binary tensor data for a given tensor info
     * @param stream The stream
     * @param tensorInfo The tensor metadata
     * @return The binary tensor data
     * @throws IOException Thrown if the stream fails to read
     */
    private ByteBuffer readTensorData(InputStream stream, GGUFTensorInfo tensorInfo) throws IOException {
        //read up to the offset
        while(this.bytesRead < tensorInfo.getOffset()){
            stream.read();
            this.bytesRead++;
        }
        long totalSize = 1;
        for(int i = 0; i < tensorInfo.getnDimensions(); i++){
            totalSize = totalSize * tensorInfo.getDimensions()[i];
        }
        float bitsPerWeight = GGUFTensorInfo.getUnitSize(tensorInfo.getType());
        System.out.println("Bits per weight: " + bitsPerWeight);
        // totalSize = totalSize;
        return null;
    }
    /**
     * Reads a little-endian char from the stream
     * @param stream The stream
     * @return The char
     * @throws IOException Thrown if the stream fails to read
     */
    private char streamReadChar(InputStream stream) throws IOException {
        byte[] bytes = stream.readNBytes(1);
        readerBuff.position(0);
        readerBuff.put(bytes);
        readerBuff.position(0);
        this.bytesRead += 1;
        return readerBuff.asCharBuffer().get();
    }
    /**
     * Reads a little-endian short from the stream
     * @param stream The stream
     * @return The short
     * @throws IOException Thrown if the stream fails to read
     */
    private short streamReadShort(InputStream stream) throws IOException {
        byte[] bytes = stream.readNBytes(2);
        readerBuff.position(0);
        readerBuff.put(bytes);
        readerBuff.position(0);
        this.bytesRead += 2;
        return readerBuff.asShortBuffer().get();
    }
    /**
     * Reads a little-endian int from the stream
     * @param stream The stream
     * @return The int
     * @throws IOException Thrown if the stream fails to read
     */
    private int streamReadInt(InputStream stream) throws IOException {
        byte[] bytes = stream.readNBytes(4);
        readerBuff.position(0);
        readerBuff.put(bytes);
        readerBuff.position(0);
        this.bytesRead += 4;
        return readerBuff.asIntBuffer().get();
    }
    /**
     * Reads a little-endian float from the stream
     * @param stream The stream
     * @return The float
     * @throws IOException Thrown if the stream fails to read
     */
    private float streamReadFloat(InputStream stream) throws IOException {
        byte[] bytes = stream.readNBytes(4);
        readerBuff.position(0);
        readerBuff.put(bytes);
        readerBuff.position(0);
        this.bytesRead += 4;
        return readerBuff.asFloatBuffer().get();
    }
    /**
     * Reads a little-endian long from the stream
     * @param stream The stream
     * @return The long
     * @throws IOException Thrown if the stream fails to read
     */
    private long streamReadLong(InputStream stream) throws IOException {
        byte[] bytes = stream.readNBytes(8);
        readerBuff.position(0);
        readerBuff.put(bytes);
        readerBuff.position(0);
        this.bytesRead += 8;
        return readerBuff.asLongBuffer().get();
    }
    /**
     * Reads a little-endian double from the stream
     * @param stream The stream
     * @return The double
     * @throws IOException Thrown if the stream fails to read
     */
    private double streamReadDouble(InputStream stream) throws IOException {
        byte[] bytes = stream.readNBytes(8);
        readerBuff.position(0);
        readerBuff.put(bytes);
        readerBuff.position(0);
        this.bytesRead += 8;
        return readerBuff.asDoubleBuffer().get();
    }
    /**
     * Reads a GGUF-format string
     * @param stream The stream
     * @return The Java string containing the data from the GGUF-format string
     */
    private String readGGUFString(InputStream stream) throws IOException {
        long length = this.streamReadLong(stream);
        byte[] bytes = stream.readNBytes((int)length);
        this.bytesRead += length;
        String rVal = new String(bytes, StandardCharsets.UTF_8);
        return rVal;
    }
 }
--- a/src/main/java/org/studiorailgun/kobold/gguf/GGUFMetadataKV.java
+++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFMetadataKV.java
@ -0,0 +1,57 @@
 package org.studiorailgun.kobold.gguf;
 /**
 * A metadata key-value pair
 */
 public class GGUFMetadataKV {
    /**
     * Metadata value types
     */
    public static enum GGUFMetadataValue {
        UINT8,
        INT8,
        UINT16,
        INT16,
        UINT32,
        INT32,
        FLOAT32,
        BOOL,
        STRING,
        ARRAY,
        UINT64,
        INT64,
        DOUBLE,
    }
    /**
     * The key for the metadata value
     */
    protected String key;
    /**
     * The type of the value
     */
    protected GGUFMetadataValue value_type; //stored as an int on disk
    /**
     * The actual value
     */
    protected Object value;
    public String getKey() {
        return key;
    }
    public GGUFMetadataValue getValue_type() {
        return value_type;
    }
    public Object getValue() {
        return value;
    }
 }
--- a/src/main/java/org/studiorailgun/kobold/gguf/GGUFModel.java
+++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFModel.java
@ -0,0 +1,51 @@
 package org.studiorailgun.kobold.gguf;
 import java.nio.ByteBuffer;
 import java.util.LinkedList;
 import java.util.List;
 /**
 * A GGUF format model
 */
 public class GGUFModel {
    /**
     * The header
     */
    protected GGUFHeader header;
    /**
     * The tensor info
     */
    protected List<GGUFTensorInfo> tensorInfos = new LinkedList<GGUFTensorInfo>();
    /**
     * Padding to the nearest multiple of ALIGNMENT
     */
    protected char _padding[];
    /**
     * The tensor data
     */
    protected List<ByteBuffer> tensorData = new LinkedList<ByteBuffer>();
    public GGUFHeader getHeader() {
        return header;
    }
    public List<GGUFTensorInfo> getTensorInfos() {
        return tensorInfos;
    }
    public char[] get_padding() {
        return _padding;
    }
    public List<ByteBuffer> getTensorData() {
        return tensorData;
    }
 }
--- a/src/main/java/org/studiorailgun/kobold/gguf/GGUFString.java
+++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFString.java
@ -0,0 +1,18 @@
 package org.studiorailgun.kobold.gguf;
 /**
 * A gguf format string
 */
 public class GGUFString {
    /**
     * The length of the string
     */
    protected long len;
    /**
     * The string itself
     */
    protected String string;
 }
--- a/src/main/java/org/studiorailgun/kobold/gguf/GGUFTensorInfo.java
+++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFTensorInfo.java
@ -0,0 +1,378 @@
 package org.studiorailgun.kobold.gguf;
 /**
 * Info on a tensor in the model
 */
 public class GGUFTensorInfo {
    /**
     * GGML types
     */
    public static enum GGMLType {
        GGML_TYPE_F32,
        GGML_TYPE_F16,
        GGML_TYPE_Q4_0,
        GGML_TYPE_Q4_1,
        GGML_TYPE_Q5_0,
        GGML_TYPE_Q5_1,
        GGML_TYPE_Q8_0,
        GGML_TYPE_Q8_1,
        GGML_TYPE_Q2_K,
        GGML_TYPE_Q3_K,
        GGML_TYPE_Q4_K,
        GGML_TYPE_Q5_K,
        GGML_TYPE_Q6_K,
        GGML_TYPE_Q8_K,
        GGML_TYPE_IQ2_XXS,
        GGML_TYPE_IQ2_XS,
        GGML_TYPE_IQ3_XXS,
        GGML_TYPE_IQ1_S,
        GGML_TYPE_IQ4_NL,
        GGML_TYPE_IQ3_S,
        GGML_TYPE_IQ2_S,
        GGML_TYPE_IQ4_XS,
        GGML_TYPE_I8,
        GGML_TYPE_I16,
        GGML_TYPE_I32,
        GGML_TYPE_I64,
        GGML_TYPE_F64,
        GGML_TYPE_IQ1_M,
        GGML_TYPE_COUNT,
    }
    protected String name;
    protected int nDimensions;
    protected long[] dimensions;
    protected GGMLType type; //stored as an int on disk
    protected long offset;
    public String getName() {
        return name;
    }
    public int getnDimensions() {
        return nDimensions;
    }
    public long[] getDimensions() {
        return dimensions;
    }
    public GGMLType getType() {
        return type;
    }
    public long getOffset() {
        return offset;
    }
    /**
     * Gets the GGML enum type from an int representation
     * @param enumVal The value represented as an int
     * @return The actual enum value
     */
    protected static GGMLType getType(int enumVal){
        switch(enumVal){
            case 0: {
                return GGMLType.GGML_TYPE_F32;
            }
            case 1: {
                return GGMLType.GGML_TYPE_F16;
            }
            case 2: {
                return GGMLType.GGML_TYPE_Q4_0;
            }
            case 3: {
                return GGMLType.GGML_TYPE_Q4_1;
            }
            case 4: {
                throw new Error("Tensor info type of deprecated type (GGML_TYPE_Q4_2)");
            }
            case 5: {
                throw new Error("Tensor info type of deprecated type (GGML_TYPE_Q4_3)");
            }
            case 6: {
                return GGMLType.GGML_TYPE_Q5_0;
            }
            case 7: {
                return GGMLType.GGML_TYPE_Q5_1;
            }
            case 8: {
                return GGMLType.GGML_TYPE_Q8_0;
            }
            case 9: {
                return GGMLType.GGML_TYPE_Q8_1;
            }
            case 10: {
                return GGMLType.GGML_TYPE_Q2_K;
            }
            case 11: {
                return GGMLType.GGML_TYPE_Q3_K;
            }
            case 12: {
                return GGMLType.GGML_TYPE_Q4_K;
            }
            case 13: {
                return GGMLType.GGML_TYPE_Q5_K;
            }
            case 14: {
                return GGMLType.GGML_TYPE_Q6_K;
            }
            case 15: {
                return GGMLType.GGML_TYPE_Q8_K;
            }
            case 16: {
                return GGMLType.GGML_TYPE_IQ2_XXS;
            }
            case 17: {
                return GGMLType.GGML_TYPE_IQ2_XS;
            }
            case 18: {
                return GGMLType.GGML_TYPE_IQ3_XXS;
            }
            case 19: {
                return GGMLType.GGML_TYPE_IQ1_S;
            }
            case 20: {
                return GGMLType.GGML_TYPE_IQ4_NL;
            }
            case 21: {
                return GGMLType.GGML_TYPE_IQ3_S;
            }
            case 22: {
                return GGMLType.GGML_TYPE_IQ2_S;
            }
            case 23: {
                return GGMLType.GGML_TYPE_IQ4_XS;
            }
            case 24: {
                return GGMLType.GGML_TYPE_I8;
            }
            case 25: {
                return GGMLType.GGML_TYPE_I16;
            }
            case 26: {
                return GGMLType.GGML_TYPE_I32;
            }
            case 27: {
                return GGMLType.GGML_TYPE_I64;
            }
            case 28: {
                return GGMLType.GGML_TYPE_F64;
            }
            case 29: {
                return GGMLType.GGML_TYPE_IQ1_M;
            }
            default: {
                return GGMLType.GGML_TYPE_COUNT;
            }
        }
    }
    /**
     * Gets the size in bits of the GGML type
     * @param type The type
     * @return The number of bits that type takes
     */
    protected static float getUnitSize(GGMLType type){
        //reference: https://github.com/ggerganov/llama.cpp/wiki/Tensor-Encoding-Schemes
        switch(type){
            case GGML_TYPE_F32: {
                return 32;
            }
            case GGML_TYPE_F16: {
                return 16;
            }
            case GGML_TYPE_Q4_1:
            case GGML_TYPE_Q4_0: {
                return 4;
            }
            case GGML_TYPE_Q5_0:
            case GGML_TYPE_Q5_1: {
                return 5;
            }
            case GGML_TYPE_Q8_0:
            case GGML_TYPE_Q8_1: {
                return 8;
            }
            case GGML_TYPE_Q2_K: {
                return 2.5625f;
            }
            case GGML_TYPE_Q3_K: {
                return 3.4375f;
            }
            case GGML_TYPE_Q4_K: {
                return 4.5f;
            }
            case GGML_TYPE_Q5_K: {
                return 5.5f;
            }
            case GGML_TYPE_Q6_K: {
                return 6.5625f;
            }
            case GGML_TYPE_Q8_K: {
                return 8;
            }
            case GGML_TYPE_IQ2_XXS: {
                return 2.0625f;
            }
            case GGML_TYPE_IQ2_XS: {
                return 2.31f;
            }
            case GGML_TYPE_IQ3_XXS: {
                return 3.0625f;
            }
            case GGML_TYPE_IQ1_S: {
                return 1.5f;
            }
            case GGML_TYPE_IQ4_NL: {
                return 4.5f;
            }
            case GGML_TYPE_IQ3_S: {
                return 3.4375f;
            }
            case GGML_TYPE_IQ2_S: {
                return 2.5f;
            }
            case GGML_TYPE_IQ4_XS: {
                return 4.25f;
            }
            case GGML_TYPE_I8: {
                return 8;
            }
            case GGML_TYPE_I16: {
                return 16;
            }
            case GGML_TYPE_I32: {
                return 32;
            }
            case GGML_TYPE_F64:
            case GGML_TYPE_I64: {
                return 64;
            }
            case GGML_TYPE_IQ1_M: {
                return 1.75f;
            }
            case GGML_TYPE_COUNT:
            default: {
                throw new Error("Undefined type!");
            }
        }
    }
    /**
     * Gets the block size of a given GGML type in bytes
     * @param type The type
     * @return The size of a block in the tensor data of this type in bytes
     */
    protected static float getBlockSizeInBytes(GGMLType type){
        //reference: https://github.com/ggerganov/llama.cpp/wiki/Tensor-Encoding-Schemes
        switch(type){
            //regular types
            case GGML_TYPE_I8: {
                return 1;
            }
            case GGML_TYPE_F16:
            case GGML_TYPE_I16: {
                return 2;
            }
            case GGML_TYPE_F32:
            case GGML_TYPE_I32: {
                return 4;
            }
            case GGML_TYPE_F64:
            case GGML_TYPE_I64: {
                return 8;
            }
            case GGML_TYPE_Q4_1:
            case GGML_TYPE_Q4_0: {
                return 4;
            }
            case GGML_TYPE_Q5_0:
            case GGML_TYPE_Q5_1: {
                return 5;
            }
            case GGML_TYPE_Q8_0:
            case GGML_TYPE_Q8_1: {
                return 8;
            }
            case GGML_TYPE_Q2_K: {
                return 2.5625f;
            }
            case GGML_TYPE_Q3_K: {
                return 3.4375f;
            }
            case GGML_TYPE_Q4_K: {
                return 4.5f;
            }
            case GGML_TYPE_Q5_K: {
                return 5.5f;
            }
            case GGML_TYPE_Q6_K: {
                return 6.5625f;
            }
            case GGML_TYPE_Q8_K: {
                return 8;
            }
            case GGML_TYPE_IQ2_XXS: {
                return 2.0625f;
            }
            case GGML_TYPE_IQ2_XS: {
                return 2.31f;
            }
            case GGML_TYPE_IQ3_XXS: {
                return 3.0625f;
            }
            case GGML_TYPE_IQ1_S: {
                return 1.5f;
            }
            case GGML_TYPE_IQ4_NL: {
                return 4.5f;
            }
            case GGML_TYPE_IQ3_S: {
                return 3.4375f;
            }
            case GGML_TYPE_IQ2_S: {
                return 2.5f;
            }
            case GGML_TYPE_IQ4_XS: {
                return 4.25f;
            }
            case GGML_TYPE_IQ1_M: {
                return 1.75f;
            }
            case GGML_TYPE_COUNT:
            default: {
                throw new Error("Undefined type!");
            }
        }
    }
 }
--- a/src/main/java/org/studiorailgun/kobold/gguf/quant/Quantization.java
+++ b/src/main/java/org/studiorailgun/kobold/gguf/quant/Quantization.java
@ -0,0 +1,245 @@
 package org.studiorailgun.kobold.gguf.quant;
 import java.nio.ByteBuffer;
 /**
 * 2-bit quanization
 */
 public class Quantization {
    //reference for superblock structs: https://github.com/byroneverson/llm.cpp/blob/master/k_quants.h
    //also maybe reference: https://github.com/ggerganov/llama.cpp/blob/master/ggml/src/ggml-common.h
    //tensor encoding scheme wiki page: https://github.com/ggerganov/llama.cpp/wiki/Tensor-Encoding-Schemes
    //thread explaining quantization scheme: https://github.com/ggerganov/llama.cpp/pull/8151
    //https://github.com/ggerganov/llama.cpp/blob/75af08c475e285888f66556d0f459c533b7deb95/ggml/src/ggml-impl.h
    /**
     * Super block size
     */
    static final int QK_K = 256;
    /**
     * Scale size
     */
    static final int K_SCALE_SIZE = 12;
    /**
     * A buffer used for type conversion
     */
    static ByteBuffer conversionBuffer = ByteBuffer.allocate(8);
    /**
     * 2-bit quantization
     */
    public static class Block_Q2_K implements Superblock {
        /**
         * scales and mins, quantized with 4 bits
         */
        char[] scales = new char[QK_K/16];
        /**
         * quants
         */
        char[] qs = new char[QK_K/4];
        /**
         * super block scale for quantized scales
         */
        short d;
        /**
         * super block scale for quantized mins
         */
        short dmin;
        /**
         * Gets the size of the superblock
         * @return The size of the superblock
         */
        public int getSize(){
            return 0;
        }
    }
    /**
     * 3-bit quantization
     */
    public static class Block_Q3_K implements Superblock {
        /**
         * quants - high bit
         */
        char[] hmask = new char[QK_K/8];
        /**
         * quants - low 2 bits
         */
        char[] qs = new char[QK_K/4];
        /**
         * scales, quantized with 6 bits
         */
        char[] scales = new char[12];
        /**
         * super block scale
         */
        short dmin;
        /**
         * Gets the size of the superblock
         * @return The size of the superblock
         */
        public int getSize(){
            return 0;
        }
    }
    /**
     * 4-bit quantization
     */
    public static class Block_Q4_K implements Superblock {
        /**
         * Super block scale for quantized scales
         */
        short d;
        /**
         * Super block scale for quantized mins
         */
        short dmin;
        /**
         * Shales and mins, quantized with 6 bits
         */
        char[] scales = new char[K_SCALE_SIZE];
        /**
         * 4-bit quants
         */
        char[] qs = new char[QK_K/2];
        /**
         * Gets the size of the superblock
         * @return The size of the superblock
         */
        public int getSize(){
            return 0;
        }
    }
    /**
     * 5-bit quantization
     */
    public static class Block_Q5_K implements Superblock {
        /**
         * Super block scale for quantized scales
         */
        short d;
        /**
         * Super block scale for quantized mins
         */
        short dmin;
        /**
         * Scales and mins, quantized with 6 bits
         */
        char[] scales = new char[K_SCALE_SIZE];
        /**
         * quants, high bit
         */
        char[] qh = new char[QK_K/8];
        /**
         * quants, low 4 bits
         */
        char[] qs = new char[QK_K/2];
        /**
         * Gets the size of the superblock
         * @return The size of the superblock
         */
        public int getSize(){
            return 0;
        }
    }
    /**
     * 6-bit quantization
     */
    public static class Block_Q6_K implements Superblock {
        /**
         * Quants, lower 4 bits
         */
        char[] ql = new char[QK_K/2];
        /**
         * Quants, upper 2 bits
         */
        char[] qh = new char[QK_K/4];
        /**
         * scales, quantized with 8 bits
         */
        char[] scales = new char[QK_K/16];
        /**
         * super block scale
         */
        short d;
        /**
         * Gets the size of the superblock
         * @return The size of the superblock
         */
        public int getSize(){
            return QK_K/2 + QK_K/4 + QK_K/16 + 2;
        }
    }
    /**
     * 8-bit quantization
     */
    public static class Block_Q8_K implements Superblock {
        /**
         * Delta
         */
        float d;
        /**
         * quants
         */
        char[] qs = new char[QK_K];
        /**
         * sum of the quants in groups of 16
         */
        short[] bsums = new short[QK_K/16];
        /**
         * Gets the size of the superblock
         * @return The size of the superblock
         */
        public int getSize(){
            return 0;
        }
    }
 }
--- a/src/main/java/org/studiorailgun/kobold/gguf/quant/Superblock.java
+++ b/src/main/java/org/studiorailgun/kobold/gguf/quant/Superblock.java
@ -0,0 +1,14 @@
 package org.studiorailgun.kobold.gguf.quant;
 /**
 * A superblock of quantized weights
 */
 public interface Superblock {
    /**
     * Gets the size of the superblock
     * @return The size of the superblock
     */
    public int getSize();
 }
		`@ -0,0 +1 @@`
							`[1736213723] warming up the model with an empty run`