gguf header loading

2025-01-07 13:03:58 -05:00 · 2025-01-07 13:03:58 -05:00 · 56594eb667
commit 56594eb667
parent 8e5546d772
10 changed files with 1251 additions and 0 deletions
--- a/llama.log
+++ b/llama.log
@ -0,0 +1 @@
+[1736213723] warming up the model with an empty run
--- a/src/main/java/org/studiorailgun/Main.java
+++ b/src/main/java/org/studiorailgun/Main.java
@ -1,6 +1,16 @@
 package org.studiorailgun;

+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+
 import org.studiorailgun.interact.GameLoop;
+import org.studiorailgun.kobold.gguf.GGUFHeader;
+import org.studiorailgun.kobold.gguf.GGUFLoader;
+import org.studiorailgun.kobold.gguf.GGUFMetadataKV;
+import org.studiorailgun.kobold.gguf.GGUFModel;
+import org.studiorailgun.kobold.gguf.GGUFTensorInfo;

 /**
 * The main class
@ -11,6 +21,25 @@ public class Main {
     * The main method
     */
    public static void main(String[] args){
+        try {
+            GGUFLoader loader = new GGUFLoader();
+            GGUFModel model = loader.load("C:\\Users\\satellite\\Documents\\ai\\koboldcpp\\Fimbulvetr-Kuro-Lotus-10.7B-Q6_K.gguf");
+            System.out.println("Metadata: ");
+            GGUFHeader header = model.getHeader();
+            for(GGUFMetadataKV pair : header.getMetadataPairs()){
+                System.out.println(pair.getKey() + " - " + pair.getValue_type() + " - " + pair.getValue());
+            }
+
+            System.out.println("\n");
+
+            for(GGUFTensorInfo tensorInfo : model.getTensorInfos()){
+                System.out.println(tensorInfo.getName() + " - " + tensorInfo.getType() + " - " + tensorInfo.getOffset());
+            }
+
+            System.exit(0);
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
        GameLoop.main();
    }

--- a/src/main/java/org/studiorailgun/kobold/gguf/GGUFHeader.java
+++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFHeader.java
@ -0,0 +1,43 @@
+package org.studiorailgun.kobold.gguf;
+
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * The header of a gguf file
+ */
+public class GGUFHeader {
+    
+    protected int magic;
+
+    protected int version;
+
+    protected long tensorCount;
+
+    protected long metadataKVCount;
+
+    protected List<GGUFMetadataKV> metadataPairs = new LinkedList<GGUFMetadataKV>();
+
+    public int getMagic() {
+        return magic;
+    }
+
+    public int getVersion() {
+        return version;
+    }
+
+    public long getTensorCount() {
+        return tensorCount;
+    }
+
+    public long getMetadataKVCount() {
+        return metadataKVCount;
+    }
+
+    public List<GGUFMetadataKV> getMetadataPairs() {
+        return metadataPairs;
+    }
+
+
+    
+}
--- a/src/main/java/org/studiorailgun/kobold/gguf/GGUFLoader.java
+++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFLoader.java
@ -0,0 +1,415 @@
+package org.studiorailgun.kobold.gguf;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+
+import org.studiorailgun.kobold.gguf.GGUFMetadataKV.GGUFMetadataValue;
+
+/**
+ * Loads a gguf format model
+ */
+public class GGUFLoader {
+
+    //gguf core explanation: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md
+
+    /**
+     * The default alignment for the file
+     */
+    static final int DEFAULT_ALIGNMENT = 32;
+
+    /**
+     * Used for converting bytes read in to little endian
+     */
+    ByteBuffer readerBuff = ByteBuffer.allocate(8);
+
+    /**
+     * The alignment of the file
+     */
+    int alignment = DEFAULT_ALIGNMENT;
+
+    /**
+     * The number of bytes read -- used for keeping track of alignment
+     */
+    long bytesRead = 0;
+
+    /**
+     * Constructor
+     */
+    public GGUFLoader(){
+        readerBuff.order(ByteOrder.LITTLE_ENDIAN);
+    }
+    
+    /**
+     * Loads the gguf model at the specified path
+     * @param path The path
+     */
+    public GGUFModel load(String path) throws IOException {
+        InputStream stream = Files.newInputStream(new File(path).toPath());
+        
+        GGUFModel rVal = new GGUFModel();
+
+        //read the header
+        rVal.header = this.readHeader(stream);
+
+        //read the tensor info
+        for(int i = 0; i < rVal.header.tensorCount; i++){
+            rVal.tensorInfos.add(this.readTensorInfo(stream));
+        }
+
+        //read padding
+        while(bytesRead % (alignment / 8) > 0){
+            stream.read();
+            bytesRead++;
+        }
+
+        //read the tensor data
+        for(int i = 0; i < rVal.tensorInfos.size(); i++){
+            GGUFTensorInfo tensorInfo = rVal.tensorInfos.get(i);
+            rVal.tensorData.add(this.readTensorData(stream,tensorInfo));
+        }
+
+        stream.close();
+
+        return rVal;
+    }
+
+    /**
+     * Reads a GGUF header from an input stream
+     * @param stream The stream
+     * @return The header
+     */
+    private GGUFHeader readHeader(InputStream stream) throws IOException {
+        GGUFHeader rVal = new GGUFHeader();
+        rVal.magic = this.streamReadInt(stream);
+        rVal.version = this.streamReadInt(stream);
+        rVal.tensorCount = this.streamReadLong(stream);
+        rVal.metadataKVCount = this.streamReadLong(stream);
+
+        System.out.println(rVal.magic + " " + rVal.version + " " + rVal.tensorCount + " " + rVal.metadataKVCount);
+
+        //read in the metadata kv pairs
+        for(int i = 0; i < rVal.metadataKVCount; i++){
+            rVal.metadataPairs.add(this.readMetadataPair(stream));
+        }
+
+        return rVal;
+    }
+
+    /**
+     * Reads in a GGUF metadata pair
+     * @param stream The stream
+     * @return The pair
+     */
+    private GGUFMetadataKV readMetadataPair(InputStream stream) throws IOException {
+        GGUFMetadataKV pair = new GGUFMetadataKV();
+
+        pair.key = this.readGGUFString(stream);
+
+        int typeRaw = this.streamReadInt(stream);
+        switch(typeRaw){
+            case 0: {
+                pair.value_type = GGUFMetadataValue.UINT8;
+                pair.value = this.streamReadChar(stream);
+            } break;
+            case 1: {
+                pair.value_type = GGUFMetadataValue.INT8;
+                pair.value = this.streamReadChar(stream);
+            } break;
+            case 2: {
+                pair.value_type = GGUFMetadataValue.UINT16;
+                pair.value = this.streamReadShort(stream);
+            } break;
+            case 3: {
+                pair.value_type = GGUFMetadataValue.INT16;
+                pair.value = this.streamReadShort(stream);
+            } break;
+            case 4: {
+                pair.value_type = GGUFMetadataValue.UINT32;
+                pair.value = this.streamReadInt(stream);
+            } break;
+            case 5: {
+                pair.value_type = GGUFMetadataValue.INT32;
+                pair.value = this.streamReadInt(stream);
+            } break;
+            case 6: {
+                pair.value_type = GGUFMetadataValue.FLOAT32;
+                pair.value = this.streamReadFloat(stream);
+            } break;
+            case 7: {
+                pair.value_type = GGUFMetadataValue.BOOL;
+                pair.value = this.streamReadChar(stream);
+            } break;
+            case 8: {
+                pair.value_type = GGUFMetadataValue.STRING;
+                pair.value = this.readGGUFString(stream);
+            } break;
+            case 9: {
+                pair.value_type = GGUFMetadataValue.ARRAY;
+                pair.value = this.readMetadataArray(stream);
+            } break;
+            case 10: {
+                pair.value_type = GGUFMetadataValue.UINT64;
+                pair.value = this.streamReadLong(stream);
+            } break;
+            case 11: {
+                pair.value_type = GGUFMetadataValue.INT64;
+                pair.value = this.streamReadLong(stream);
+            } break;
+            case 12: {
+                pair.value_type = GGUFMetadataValue.DOUBLE;
+                pair.value = this.streamReadDouble(stream);
+            } break;
+            default: {
+                throw new Error("Invalid metadata pair data type! " + typeRaw);
+            }
+        }
+
+        return pair;
+    }
+
+    /**
+     * Reads a metadata array value
+     * @param stream The stream
+     * @return The array value
+     * @throws IOException Thrown if the stream has an io exception
+     */
+    private Object readMetadataArray(InputStream stream) throws IOException {
+        int typeRaw = this.streamReadInt(stream);
+        int len = (int)this.streamReadLong(stream);
+        switch(typeRaw){
+            case 0:
+            case 1: {
+                char[] rVal = new char[len];
+                for(int i = 0; i < len; i++){
+                    rVal[i] = this.streamReadChar(stream);
+                }
+                return rVal;
+            }
+            case 2:
+            case 3: {
+                short[] rVal = new short[len];
+                for(int i = 0; i < len; i++){
+                    rVal[i] = this.streamReadShort(stream);
+                }
+                return rVal;
+            }
+            case 4:
+            case 5: {
+                int[] rVal = new int[len];
+                for(int i = 0; i < len; i++){
+                    rVal[i] = this.streamReadInt(stream);
+                }
+                return rVal;
+            }
+            case 6: {
+                float[] rVal = new float[len];
+                for(int i = 0; i < len; i++){
+                    rVal[i] = this.streamReadFloat(stream);
+                }
+                return rVal;
+            }
+            case 7: {
+                boolean[] rVal = new boolean[len];
+                for(int i = 0; i < len; i++){
+                    rVal[i] = this.streamReadChar(stream) > 0;
+                }
+                return rVal;
+            }
+            case 8: {
+                String[] rVal = new String[len];
+                for(int i = 0; i < len; i++){
+                    rVal[i] = this.readGGUFString(stream);
+                }
+                return rVal;
+            }
+            case 9: {
+                Object[] rVal = new Object[len];
+                for(int i = 0; i < len; i++){
+                    rVal[i] = this.readMetadataArray(stream);
+                }
+                return rVal;
+            }
+            case 10:
+            case 11: {
+                long[] rVal = new long[len];
+                for(int i = 0; i < len; i++){
+                    rVal[i] = this.streamReadLong(stream);
+                }
+                return rVal;
+            }
+            case 12: {
+                double[] rVal = new double[len];
+                for(int i = 0; i < len; i++){
+                    rVal[i] = this.streamReadDouble(stream);
+                }
+                return rVal;
+            }
+            default: {
+                throw new Error("Failed to read metadata array -- invalid type! " + typeRaw);
+            }
+        }
+    }
+
+    /**
+     * Reads info on a tensor from the file
+     * @param stream The stream
+     * @return The tensor info
+     * @throws IOException Thrown if the stream fails to read
+     */
+    private GGUFTensorInfo readTensorInfo(InputStream stream) throws IOException {
+        GGUFTensorInfo rVal = new GGUFTensorInfo();
+
+        rVal.name = this.readGGUFString(stream);
+
+        rVal.nDimensions = this.streamReadInt(stream);
+
+        rVal.dimensions = new long[rVal.nDimensions];
+        for(int i = 0; i < rVal.nDimensions; i++){
+            rVal.dimensions[i] = this.streamReadLong(stream);
+        }
+
+        rVal.type = GGUFTensorInfo.getType(this.streamReadInt(stream));
+
+        rVal.offset = this.streamReadLong(stream);
+
+        return rVal;
+    }
+
+    /**
+     * Reads the binary tensor data for a given tensor info
+     * @param stream The stream
+     * @param tensorInfo The tensor metadata
+     * @return The binary tensor data
+     * @throws IOException Thrown if the stream fails to read
+     */
+    private ByteBuffer readTensorData(InputStream stream, GGUFTensorInfo tensorInfo) throws IOException {
+        //read up to the offset
+        while(this.bytesRead < tensorInfo.getOffset()){
+            stream.read();
+            this.bytesRead++;
+        }
+
+        long totalSize = 1;
+        for(int i = 0; i < tensorInfo.getnDimensions(); i++){
+            totalSize = totalSize * tensorInfo.getDimensions()[i];
+        }
+        float bitsPerWeight = GGUFTensorInfo.getUnitSize(tensorInfo.getType());
+        System.out.println("Bits per weight: " + bitsPerWeight);
+        // totalSize = totalSize;
+
+
+        return null;
+    }
+
+    /**
+     * Reads a little-endian char from the stream
+     * @param stream The stream
+     * @return The char
+     * @throws IOException Thrown if the stream fails to read
+     */
+    private char streamReadChar(InputStream stream) throws IOException {
+        byte[] bytes = stream.readNBytes(1);
+        readerBuff.position(0);
+        readerBuff.put(bytes);
+        readerBuff.position(0);
+        this.bytesRead += 1;
+        return readerBuff.asCharBuffer().get();
+    }
+
+    /**
+     * Reads a little-endian short from the stream
+     * @param stream The stream
+     * @return The short
+     * @throws IOException Thrown if the stream fails to read
+     */
+    private short streamReadShort(InputStream stream) throws IOException {
+        byte[] bytes = stream.readNBytes(2);
+        readerBuff.position(0);
+        readerBuff.put(bytes);
+        readerBuff.position(0);
+        this.bytesRead += 2;
+        return readerBuff.asShortBuffer().get();
+    }
+
+    /**
+     * Reads a little-endian int from the stream
+     * @param stream The stream
+     * @return The int
+     * @throws IOException Thrown if the stream fails to read
+     */
+    private int streamReadInt(InputStream stream) throws IOException {
+        byte[] bytes = stream.readNBytes(4);
+        readerBuff.position(0);
+        readerBuff.put(bytes);
+        readerBuff.position(0);
+        this.bytesRead += 4;
+        return readerBuff.asIntBuffer().get();
+    }
+
+    /**
+     * Reads a little-endian float from the stream
+     * @param stream The stream
+     * @return The float
+     * @throws IOException Thrown if the stream fails to read
+     */
+    private float streamReadFloat(InputStream stream) throws IOException {
+        byte[] bytes = stream.readNBytes(4);
+        readerBuff.position(0);
+        readerBuff.put(bytes);
+        readerBuff.position(0);
+        this.bytesRead += 4;
+        return readerBuff.asFloatBuffer().get();
+    }
+
+    /**
+     * Reads a little-endian long from the stream
+     * @param stream The stream
+     * @return The long
+     * @throws IOException Thrown if the stream fails to read
+     */
+    private long streamReadLong(InputStream stream) throws IOException {
+        byte[] bytes = stream.readNBytes(8);
+        readerBuff.position(0);
+        readerBuff.put(bytes);
+        readerBuff.position(0);
+        this.bytesRead += 8;
+        return readerBuff.asLongBuffer().get();
+    }
+
+    /**
+     * Reads a little-endian double from the stream
+     * @param stream The stream
+     * @return The double
+     * @throws IOException Thrown if the stream fails to read
+     */
+    private double streamReadDouble(InputStream stream) throws IOException {
+        byte[] bytes = stream.readNBytes(8);
+        readerBuff.position(0);
+        readerBuff.put(bytes);
+        readerBuff.position(0);
+        this.bytesRead += 8;
+        return readerBuff.asDoubleBuffer().get();
+    }
+
+    /**
+     * Reads a GGUF-format string
+     * @param stream The stream
+     * @return The Java string containing the data from the GGUF-format string
+     */
+    private String readGGUFString(InputStream stream) throws IOException {
+        long length = this.streamReadLong(stream);
+
+        byte[] bytes = stream.readNBytes((int)length);
+        this.bytesRead += length;
+
+        String rVal = new String(bytes, StandardCharsets.UTF_8);
+
+        return rVal;
+    }
+
+}
--- a/src/main/java/org/studiorailgun/kobold/gguf/GGUFMetadataKV.java
+++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFMetadataKV.java
@ -0,0 +1,57 @@
+package org.studiorailgun.kobold.gguf;
+
+
+/**
+ * A metadata key-value pair
+ */
+public class GGUFMetadataKV {
+    
+    /**
+     * Metadata value types
+     */
+    public static enum GGUFMetadataValue {
+        UINT8,
+        INT8,
+        UINT16,
+        INT16,
+        UINT32,
+        INT32,
+        FLOAT32,
+        BOOL,
+        STRING,
+        ARRAY,
+        UINT64,
+        INT64,
+        DOUBLE,
+    }
+
+    /**
+     * The key for the metadata value
+     */
+    protected String key;
+
+    /**
+     * The type of the value
+     */
+    protected GGUFMetadataValue value_type; //stored as an int on disk
+
+    /**
+     * The actual value
+     */
+    protected Object value;
+
+    public String getKey() {
+        return key;
+    }
+
+    public GGUFMetadataValue getValue_type() {
+        return value_type;
+    }
+
+    public Object getValue() {
+        return value;
+    }
+
+    
+
+}
--- a/src/main/java/org/studiorailgun/kobold/gguf/GGUFModel.java
+++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFModel.java
@ -0,0 +1,51 @@
+package org.studiorailgun.kobold.gguf;
+
+import java.nio.ByteBuffer;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * A GGUF format model
+ */
+public class GGUFModel {
+    
+    /**
+     * The header
+     */
+    protected GGUFHeader header;
+
+    /**
+     * The tensor info
+     */
+    protected List<GGUFTensorInfo> tensorInfos = new LinkedList<GGUFTensorInfo>();
+
+    /**
+     * Padding to the nearest multiple of ALIGNMENT
+     */
+    protected char _padding[];
+
+    /**
+     * The tensor data
+     */
+    protected List<ByteBuffer> tensorData = new LinkedList<ByteBuffer>();
+
+    public GGUFHeader getHeader() {
+        return header;
+    }
+
+    public List<GGUFTensorInfo> getTensorInfos() {
+        return tensorInfos;
+    }
+
+    public char[] get_padding() {
+        return _padding;
+    }
+
+    public List<ByteBuffer> getTensorData() {
+        return tensorData;
+    }
+
+
+    
+
+}
--- a/src/main/java/org/studiorailgun/kobold/gguf/GGUFString.java
+++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFString.java
@ -0,0 +1,18 @@
+package org.studiorailgun.kobold.gguf;
+
+/**
+ * A gguf format string
+ */
+public class GGUFString {
+    
+    /**
+     * The length of the string
+     */
+    protected long len;
+
+    /**
+     * The string itself
+     */
+    protected String string;
+
+}
--- a/src/main/java/org/studiorailgun/kobold/gguf/GGUFTensorInfo.java
+++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFTensorInfo.java
@ -0,0 +1,378 @@
+package org.studiorailgun.kobold.gguf;
+
+/**
+ * Info on a tensor in the model
+ */
+public class GGUFTensorInfo {
+
+    /**
+     * GGML types
+     */
+    public static enum GGMLType {
+        GGML_TYPE_F32,
+        GGML_TYPE_F16,
+        GGML_TYPE_Q4_0,
+        GGML_TYPE_Q4_1,
+        GGML_TYPE_Q5_0,
+        GGML_TYPE_Q5_1,
+        GGML_TYPE_Q8_0,
+        GGML_TYPE_Q8_1,
+        GGML_TYPE_Q2_K,
+        GGML_TYPE_Q3_K,
+        GGML_TYPE_Q4_K,
+        GGML_TYPE_Q5_K,
+        GGML_TYPE_Q6_K,
+        GGML_TYPE_Q8_K,
+        GGML_TYPE_IQ2_XXS,
+        GGML_TYPE_IQ2_XS,
+        GGML_TYPE_IQ3_XXS,
+        GGML_TYPE_IQ1_S,
+        GGML_TYPE_IQ4_NL,
+        GGML_TYPE_IQ3_S,
+        GGML_TYPE_IQ2_S,
+        GGML_TYPE_IQ4_XS,
+        GGML_TYPE_I8,
+        GGML_TYPE_I16,
+        GGML_TYPE_I32,
+        GGML_TYPE_I64,
+        GGML_TYPE_F64,
+        GGML_TYPE_IQ1_M,
+        GGML_TYPE_COUNT,
+    }
+    
+    protected String name;
+
+    protected int nDimensions;
+
+    protected long[] dimensions;
+
+    protected GGMLType type; //stored as an int on disk
+
+    protected long offset;
+
+
+    
+
+    public String getName() {
+        return name;
+    }
+
+
+
+
+    public int getnDimensions() {
+        return nDimensions;
+    }
+
+
+
+
+    public long[] getDimensions() {
+        return dimensions;
+    }
+
+
+
+
+    public GGMLType getType() {
+        return type;
+    }
+
+
+
+
+    public long getOffset() {
+        return offset;
+    }
+
+
+
+
+    /**
+     * Gets the GGML enum type from an int representation
+     * @param enumVal The value represented as an int
+     * @return The actual enum value
+     */
+    protected static GGMLType getType(int enumVal){
+        switch(enumVal){
+            case 0: {
+                return GGMLType.GGML_TYPE_F32;
+            }
+            case 1: {
+                return GGMLType.GGML_TYPE_F16;
+            }
+            case 2: {
+                return GGMLType.GGML_TYPE_Q4_0;
+            }
+            case 3: {
+                return GGMLType.GGML_TYPE_Q4_1;
+            }
+            case 4: {
+                throw new Error("Tensor info type of deprecated type (GGML_TYPE_Q4_2)");
+            }
+            case 5: {
+                throw new Error("Tensor info type of deprecated type (GGML_TYPE_Q4_3)");
+            }
+            case 6: {
+                return GGMLType.GGML_TYPE_Q5_0;
+            }
+            case 7: {
+                return GGMLType.GGML_TYPE_Q5_1;
+            }
+            case 8: {
+                return GGMLType.GGML_TYPE_Q8_0;
+            }
+            case 9: {
+                return GGMLType.GGML_TYPE_Q8_1;
+            }
+            case 10: {
+                return GGMLType.GGML_TYPE_Q2_K;
+            }
+            case 11: {
+                return GGMLType.GGML_TYPE_Q3_K;
+            }
+            case 12: {
+                return GGMLType.GGML_TYPE_Q4_K;
+            }
+            case 13: {
+                return GGMLType.GGML_TYPE_Q5_K;
+            }
+            case 14: {
+                return GGMLType.GGML_TYPE_Q6_K;
+            }
+            case 15: {
+                return GGMLType.GGML_TYPE_Q8_K;
+            }
+            case 16: {
+                return GGMLType.GGML_TYPE_IQ2_XXS;
+            }
+            case 17: {
+                return GGMLType.GGML_TYPE_IQ2_XS;
+            }
+            case 18: {
+                return GGMLType.GGML_TYPE_IQ3_XXS;
+            }
+            case 19: {
+                return GGMLType.GGML_TYPE_IQ1_S;
+            }
+            case 20: {
+                return GGMLType.GGML_TYPE_IQ4_NL;
+            }
+            case 21: {
+                return GGMLType.GGML_TYPE_IQ3_S;
+            }
+            case 22: {
+                return GGMLType.GGML_TYPE_IQ2_S;
+            }
+            case 23: {
+                return GGMLType.GGML_TYPE_IQ4_XS;
+            }
+            case 24: {
+                return GGMLType.GGML_TYPE_I8;
+            }
+            case 25: {
+                return GGMLType.GGML_TYPE_I16;
+            }
+            case 26: {
+                return GGMLType.GGML_TYPE_I32;
+            }
+            case 27: {
+                return GGMLType.GGML_TYPE_I64;
+            }
+            case 28: {
+                return GGMLType.GGML_TYPE_F64;
+            }
+            case 29: {
+                return GGMLType.GGML_TYPE_IQ1_M;
+            }
+            default: {
+                return GGMLType.GGML_TYPE_COUNT;
+            }
+        }
+    }
+
+    /**
+     * Gets the size in bits of the GGML type
+     * @param type The type
+     * @return The number of bits that type takes
+     */
+    protected static float getUnitSize(GGMLType type){
+        //reference: https://github.com/ggerganov/llama.cpp/wiki/Tensor-Encoding-Schemes
+        switch(type){
+            case GGML_TYPE_F32: {
+                return 32;
+            }
+            case GGML_TYPE_F16: {
+                return 16;
+            }
+            case GGML_TYPE_Q4_1:
+            case GGML_TYPE_Q4_0: {
+                return 4;
+            }
+            case GGML_TYPE_Q5_0:
+            case GGML_TYPE_Q5_1: {
+                return 5;
+            }
+            case GGML_TYPE_Q8_0:
+            case GGML_TYPE_Q8_1: {
+                return 8;
+            }
+            case GGML_TYPE_Q2_K: {
+                return 2.5625f;
+            }
+            case GGML_TYPE_Q3_K: {
+                return 3.4375f;
+            }
+            case GGML_TYPE_Q4_K: {
+                return 4.5f;
+            }
+            case GGML_TYPE_Q5_K: {
+                return 5.5f;
+            }
+            case GGML_TYPE_Q6_K: {
+                return 6.5625f;
+            }
+            case GGML_TYPE_Q8_K: {
+                return 8;
+            }
+            case GGML_TYPE_IQ2_XXS: {
+                return 2.0625f;
+            }
+            case GGML_TYPE_IQ2_XS: {
+                return 2.31f;
+            }
+            case GGML_TYPE_IQ3_XXS: {
+                return 3.0625f;
+            }
+            case GGML_TYPE_IQ1_S: {
+                return 1.5f;
+            }
+            case GGML_TYPE_IQ4_NL: {
+                return 4.5f;
+            }
+            case GGML_TYPE_IQ3_S: {
+                return 3.4375f;
+            }
+            case GGML_TYPE_IQ2_S: {
+                return 2.5f;
+            }
+            case GGML_TYPE_IQ4_XS: {
+                return 4.25f;
+            }
+            case GGML_TYPE_I8: {
+                return 8;
+            }
+            case GGML_TYPE_I16: {
+                return 16;
+            }
+            case GGML_TYPE_I32: {
+                return 32;
+            }
+            case GGML_TYPE_F64:
+            case GGML_TYPE_I64: {
+                return 64;
+            }
+            case GGML_TYPE_IQ1_M: {
+                return 1.75f;
+            }
+            case GGML_TYPE_COUNT:
+            default: {
+                throw new Error("Undefined type!");
+            }
+        }
+    }
+
+    /**
+     * Gets the block size of a given GGML type in bytes
+     * @param type The type
+     * @return The size of a block in the tensor data of this type in bytes
+     */
+    protected static float getBlockSizeInBytes(GGMLType type){
+        //reference: https://github.com/ggerganov/llama.cpp/wiki/Tensor-Encoding-Schemes
+        switch(type){
+
+            //regular types
+            case GGML_TYPE_I8: {
+                return 1;
+            }
+            case GGML_TYPE_F16:
+            case GGML_TYPE_I16: {
+                return 2;
+            }
+            case GGML_TYPE_F32:
+            case GGML_TYPE_I32: {
+                return 4;
+            }
+            case GGML_TYPE_F64:
+            case GGML_TYPE_I64: {
+                return 8;
+            }
+
+
+
+            case GGML_TYPE_Q4_1:
+            case GGML_TYPE_Q4_0: {
+                return 4;
+            }
+            case GGML_TYPE_Q5_0:
+            case GGML_TYPE_Q5_1: {
+                return 5;
+            }
+            case GGML_TYPE_Q8_0:
+            case GGML_TYPE_Q8_1: {
+                return 8;
+            }
+            case GGML_TYPE_Q2_K: {
+                return 2.5625f;
+            }
+            case GGML_TYPE_Q3_K: {
+                return 3.4375f;
+            }
+            case GGML_TYPE_Q4_K: {
+                return 4.5f;
+            }
+            case GGML_TYPE_Q5_K: {
+                return 5.5f;
+            }
+            case GGML_TYPE_Q6_K: {
+                return 6.5625f;
+            }
+            case GGML_TYPE_Q8_K: {
+                return 8;
+            }
+            case GGML_TYPE_IQ2_XXS: {
+                return 2.0625f;
+            }
+            case GGML_TYPE_IQ2_XS: {
+                return 2.31f;
+            }
+            case GGML_TYPE_IQ3_XXS: {
+                return 3.0625f;
+            }
+            case GGML_TYPE_IQ1_S: {
+                return 1.5f;
+            }
+            case GGML_TYPE_IQ4_NL: {
+                return 4.5f;
+            }
+            case GGML_TYPE_IQ3_S: {
+                return 3.4375f;
+            }
+            case GGML_TYPE_IQ2_S: {
+                return 2.5f;
+            }
+            case GGML_TYPE_IQ4_XS: {
+                return 4.25f;
+            }
+            case GGML_TYPE_IQ1_M: {
+                return 1.75f;
+            }
+            case GGML_TYPE_COUNT:
+            default: {
+                throw new Error("Undefined type!");
+            }
+        }
+    }
+
+
+}
--- a/src/main/java/org/studiorailgun/kobold/gguf/quant/Quantization.java
+++ b/src/main/java/org/studiorailgun/kobold/gguf/quant/Quantization.java
@ -0,0 +1,245 @@
+package org.studiorailgun.kobold.gguf.quant;
+
+import java.nio.ByteBuffer;
+
+/**
+ * 2-bit quanization
+ */
+public class Quantization {
+
+    //reference for superblock structs: https://github.com/byroneverson/llm.cpp/blob/master/k_quants.h
+    //also maybe reference: https://github.com/ggerganov/llama.cpp/blob/master/ggml/src/ggml-common.h
+    //tensor encoding scheme wiki page: https://github.com/ggerganov/llama.cpp/wiki/Tensor-Encoding-Schemes
+    //thread explaining quantization scheme: https://github.com/ggerganov/llama.cpp/pull/8151
+    //https://github.com/ggerganov/llama.cpp/blob/75af08c475e285888f66556d0f459c533b7deb95/ggml/src/ggml-impl.h
+
+    /**
+     * Super block size
+     */
+    static final int QK_K = 256;
+
+    /**
+     * Scale size
+     */
+    static final int K_SCALE_SIZE = 12;
+
+    /**
+     * A buffer used for type conversion
+     */
+    static ByteBuffer conversionBuffer = ByteBuffer.allocate(8);
+    
+
+    /**
+     * 2-bit quantization
+     */
+    public static class Block_Q2_K implements Superblock {
+
+        /**
+         * scales and mins, quantized with 4 bits
+         */
+        char[] scales = new char[QK_K/16];
+
+        /**
+         * quants
+         */
+        char[] qs = new char[QK_K/4];
+
+        /**
+         * super block scale for quantized scales
+         */
+        short d;
+
+        /**
+         * super block scale for quantized mins
+         */
+        short dmin;
+        
+        /**
+         * Gets the size of the superblock
+         * @return The size of the superblock
+         */
+        public int getSize(){
+            return 0;
+        }
+
+    }
+
+    /**
+     * 3-bit quantization
+     */
+    public static class Block_Q3_K implements Superblock {
+
+        /**
+         * quants - high bit
+         */
+        char[] hmask = new char[QK_K/8];
+
+        /**
+         * quants - low 2 bits
+         */
+        char[] qs = new char[QK_K/4];
+
+        /**
+         * scales, quantized with 6 bits
+         */
+        char[] scales = new char[12];
+
+        /**
+         * super block scale
+         */
+        short dmin;
+
+        /**
+         * Gets the size of the superblock
+         * @return The size of the superblock
+         */
+        public int getSize(){
+            return 0;
+        }
+
+    }
+
+    /**
+     * 4-bit quantization
+     */
+    public static class Block_Q4_K implements Superblock {
+
+        /**
+         * Super block scale for quantized scales
+         */
+        short d;
+
+        /**
+         * Super block scale for quantized mins
+         */
+        short dmin;
+
+        /**
+         * Shales and mins, quantized with 6 bits
+         */
+        char[] scales = new char[K_SCALE_SIZE];
+
+        /**
+         * 4-bit quants
+         */
+        char[] qs = new char[QK_K/2];
+
+        /**
+         * Gets the size of the superblock
+         * @return The size of the superblock
+         */
+        public int getSize(){
+            return 0;
+        }
+
+    }
+
+    /**
+     * 5-bit quantization
+     */
+    public static class Block_Q5_K implements Superblock {
+
+        /**
+         * Super block scale for quantized scales
+         */
+        short d;
+
+        /**
+         * Super block scale for quantized mins
+         */
+        short dmin;
+
+        /**
+         * Scales and mins, quantized with 6 bits
+         */
+        char[] scales = new char[K_SCALE_SIZE];
+
+        /**
+         * quants, high bit
+         */
+        char[] qh = new char[QK_K/8];
+
+        /**
+         * quants, low 4 bits
+         */
+        char[] qs = new char[QK_K/2];
+
+        /**
+         * Gets the size of the superblock
+         * @return The size of the superblock
+         */
+        public int getSize(){
+            return 0;
+        }
+
+    }
+
+    /**
+     * 6-bit quantization
+     */
+    public static class Block_Q6_K implements Superblock {
+
+        /**
+         * Quants, lower 4 bits
+         */
+        char[] ql = new char[QK_K/2];
+
+        /**
+         * Quants, upper 2 bits
+         */
+        char[] qh = new char[QK_K/4];
+
+        /**
+         * scales, quantized with 8 bits
+         */
+        char[] scales = new char[QK_K/16];
+
+        /**
+         * super block scale
+         */
+        short d;
+
+        /**
+         * Gets the size of the superblock
+         * @return The size of the superblock
+         */
+        public int getSize(){
+            return QK_K/2 + QK_K/4 + QK_K/16 + 2;
+        }
+
+    }
+
+    /**
+     * 8-bit quantization
+     */
+    public static class Block_Q8_K implements Superblock {
+
+        /**
+         * Delta
+         */
+        float d;
+
+        /**
+         * quants
+         */
+        char[] qs = new char[QK_K];
+
+        /**
+         * sum of the quants in groups of 16
+         */
+        short[] bsums = new short[QK_K/16];
+
+        /**
+         * Gets the size of the superblock
+         * @return The size of the superblock
+         */
+        public int getSize(){
+            return 0;
+        }
+
+    }
+
+
+    
+
+}
--- a/src/main/java/org/studiorailgun/kobold/gguf/quant/Superblock.java
+++ b/src/main/java/org/studiorailgun/kobold/gguf/quant/Superblock.java
@ -0,0 +1,14 @@
+package org.studiorailgun.kobold.gguf.quant;
+
+/**
+ * A superblock of quantized weights
+ */
+public interface Superblock {
+    
+    /**
+     * Gets the size of the superblock
+     * @return The size of the superblock
+     */
+    public int getSize();
+
+}
				`@ -0,0 +1 @@`
				`[1736213723] warming up the model with an empty run`