diff --git a/llama.log b/llama.log new file mode 100644 index 0000000..cc618d8 --- /dev/null +++ b/llama.log @@ -0,0 +1 @@ +[1736213723] warming up the model with an empty run diff --git a/src/main/java/org/studiorailgun/Main.java b/src/main/java/org/studiorailgun/Main.java index 27183a6..f03b597 100644 --- a/src/main/java/org/studiorailgun/Main.java +++ b/src/main/java/org/studiorailgun/Main.java @@ -1,6 +1,16 @@ package org.studiorailgun; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; + import org.studiorailgun.interact.GameLoop; +import org.studiorailgun.kobold.gguf.GGUFHeader; +import org.studiorailgun.kobold.gguf.GGUFLoader; +import org.studiorailgun.kobold.gguf.GGUFMetadataKV; +import org.studiorailgun.kobold.gguf.GGUFModel; +import org.studiorailgun.kobold.gguf.GGUFTensorInfo; /** * The main class @@ -11,6 +21,25 @@ public class Main { * The main method */ public static void main(String[] args){ + try { + GGUFLoader loader = new GGUFLoader(); + GGUFModel model = loader.load("C:\\Users\\satellite\\Documents\\ai\\koboldcpp\\Fimbulvetr-Kuro-Lotus-10.7B-Q6_K.gguf"); + System.out.println("Metadata: "); + GGUFHeader header = model.getHeader(); + for(GGUFMetadataKV pair : header.getMetadataPairs()){ + System.out.println(pair.getKey() + " - " + pair.getValue_type() + " - " + pair.getValue()); + } + + System.out.println("\n"); + + for(GGUFTensorInfo tensorInfo : model.getTensorInfos()){ + System.out.println(tensorInfo.getName() + " - " + tensorInfo.getType() + " - " + tensorInfo.getOffset()); + } + + System.exit(0); + } catch (IOException e) { + e.printStackTrace(); + } GameLoop.main(); } diff --git a/src/main/java/org/studiorailgun/kobold/gguf/GGUFHeader.java b/src/main/java/org/studiorailgun/kobold/gguf/GGUFHeader.java new file mode 100644 index 0000000..39931e6 --- /dev/null +++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFHeader.java @@ -0,0 +1,43 @@ +package org.studiorailgun.kobold.gguf; + +import java.util.LinkedList; +import java.util.List; + +/** + * The header of a gguf file + */ +public class GGUFHeader { + + protected int magic; + + protected int version; + + protected long tensorCount; + + protected long metadataKVCount; + + protected List metadataPairs = new LinkedList(); + + public int getMagic() { + return magic; + } + + public int getVersion() { + return version; + } + + public long getTensorCount() { + return tensorCount; + } + + public long getMetadataKVCount() { + return metadataKVCount; + } + + public List getMetadataPairs() { + return metadataPairs; + } + + + +} diff --git a/src/main/java/org/studiorailgun/kobold/gguf/GGUFLoader.java b/src/main/java/org/studiorailgun/kobold/gguf/GGUFLoader.java new file mode 100644 index 0000000..d8256f2 --- /dev/null +++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFLoader.java @@ -0,0 +1,415 @@ +package org.studiorailgun.kobold.gguf; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; + +import org.studiorailgun.kobold.gguf.GGUFMetadataKV.GGUFMetadataValue; + +/** + * Loads a gguf format model + */ +public class GGUFLoader { + + //gguf core explanation: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md + + /** + * The default alignment for the file + */ + static final int DEFAULT_ALIGNMENT = 32; + + /** + * Used for converting bytes read in to little endian + */ + ByteBuffer readerBuff = ByteBuffer.allocate(8); + + /** + * The alignment of the file + */ + int alignment = DEFAULT_ALIGNMENT; + + /** + * The number of bytes read -- used for keeping track of alignment + */ + long bytesRead = 0; + + /** + * Constructor + */ + public GGUFLoader(){ + readerBuff.order(ByteOrder.LITTLE_ENDIAN); + } + + /** + * Loads the gguf model at the specified path + * @param path The path + */ + public GGUFModel load(String path) throws IOException { + InputStream stream = Files.newInputStream(new File(path).toPath()); + + GGUFModel rVal = new GGUFModel(); + + //read the header + rVal.header = this.readHeader(stream); + + //read the tensor info + for(int i = 0; i < rVal.header.tensorCount; i++){ + rVal.tensorInfos.add(this.readTensorInfo(stream)); + } + + //read padding + while(bytesRead % (alignment / 8) > 0){ + stream.read(); + bytesRead++; + } + + //read the tensor data + for(int i = 0; i < rVal.tensorInfos.size(); i++){ + GGUFTensorInfo tensorInfo = rVal.tensorInfos.get(i); + rVal.tensorData.add(this.readTensorData(stream,tensorInfo)); + } + + stream.close(); + + return rVal; + } + + /** + * Reads a GGUF header from an input stream + * @param stream The stream + * @return The header + */ + private GGUFHeader readHeader(InputStream stream) throws IOException { + GGUFHeader rVal = new GGUFHeader(); + rVal.magic = this.streamReadInt(stream); + rVal.version = this.streamReadInt(stream); + rVal.tensorCount = this.streamReadLong(stream); + rVal.metadataKVCount = this.streamReadLong(stream); + + System.out.println(rVal.magic + " " + rVal.version + " " + rVal.tensorCount + " " + rVal.metadataKVCount); + + //read in the metadata kv pairs + for(int i = 0; i < rVal.metadataKVCount; i++){ + rVal.metadataPairs.add(this.readMetadataPair(stream)); + } + + return rVal; + } + + /** + * Reads in a GGUF metadata pair + * @param stream The stream + * @return The pair + */ + private GGUFMetadataKV readMetadataPair(InputStream stream) throws IOException { + GGUFMetadataKV pair = new GGUFMetadataKV(); + + pair.key = this.readGGUFString(stream); + + int typeRaw = this.streamReadInt(stream); + switch(typeRaw){ + case 0: { + pair.value_type = GGUFMetadataValue.UINT8; + pair.value = this.streamReadChar(stream); + } break; + case 1: { + pair.value_type = GGUFMetadataValue.INT8; + pair.value = this.streamReadChar(stream); + } break; + case 2: { + pair.value_type = GGUFMetadataValue.UINT16; + pair.value = this.streamReadShort(stream); + } break; + case 3: { + pair.value_type = GGUFMetadataValue.INT16; + pair.value = this.streamReadShort(stream); + } break; + case 4: { + pair.value_type = GGUFMetadataValue.UINT32; + pair.value = this.streamReadInt(stream); + } break; + case 5: { + pair.value_type = GGUFMetadataValue.INT32; + pair.value = this.streamReadInt(stream); + } break; + case 6: { + pair.value_type = GGUFMetadataValue.FLOAT32; + pair.value = this.streamReadFloat(stream); + } break; + case 7: { + pair.value_type = GGUFMetadataValue.BOOL; + pair.value = this.streamReadChar(stream); + } break; + case 8: { + pair.value_type = GGUFMetadataValue.STRING; + pair.value = this.readGGUFString(stream); + } break; + case 9: { + pair.value_type = GGUFMetadataValue.ARRAY; + pair.value = this.readMetadataArray(stream); + } break; + case 10: { + pair.value_type = GGUFMetadataValue.UINT64; + pair.value = this.streamReadLong(stream); + } break; + case 11: { + pair.value_type = GGUFMetadataValue.INT64; + pair.value = this.streamReadLong(stream); + } break; + case 12: { + pair.value_type = GGUFMetadataValue.DOUBLE; + pair.value = this.streamReadDouble(stream); + } break; + default: { + throw new Error("Invalid metadata pair data type! " + typeRaw); + } + } + + return pair; + } + + /** + * Reads a metadata array value + * @param stream The stream + * @return The array value + * @throws IOException Thrown if the stream has an io exception + */ + private Object readMetadataArray(InputStream stream) throws IOException { + int typeRaw = this.streamReadInt(stream); + int len = (int)this.streamReadLong(stream); + switch(typeRaw){ + case 0: + case 1: { + char[] rVal = new char[len]; + for(int i = 0; i < len; i++){ + rVal[i] = this.streamReadChar(stream); + } + return rVal; + } + case 2: + case 3: { + short[] rVal = new short[len]; + for(int i = 0; i < len; i++){ + rVal[i] = this.streamReadShort(stream); + } + return rVal; + } + case 4: + case 5: { + int[] rVal = new int[len]; + for(int i = 0; i < len; i++){ + rVal[i] = this.streamReadInt(stream); + } + return rVal; + } + case 6: { + float[] rVal = new float[len]; + for(int i = 0; i < len; i++){ + rVal[i] = this.streamReadFloat(stream); + } + return rVal; + } + case 7: { + boolean[] rVal = new boolean[len]; + for(int i = 0; i < len; i++){ + rVal[i] = this.streamReadChar(stream) > 0; + } + return rVal; + } + case 8: { + String[] rVal = new String[len]; + for(int i = 0; i < len; i++){ + rVal[i] = this.readGGUFString(stream); + } + return rVal; + } + case 9: { + Object[] rVal = new Object[len]; + for(int i = 0; i < len; i++){ + rVal[i] = this.readMetadataArray(stream); + } + return rVal; + } + case 10: + case 11: { + long[] rVal = new long[len]; + for(int i = 0; i < len; i++){ + rVal[i] = this.streamReadLong(stream); + } + return rVal; + } + case 12: { + double[] rVal = new double[len]; + for(int i = 0; i < len; i++){ + rVal[i] = this.streamReadDouble(stream); + } + return rVal; + } + default: { + throw new Error("Failed to read metadata array -- invalid type! " + typeRaw); + } + } + } + + /** + * Reads info on a tensor from the file + * @param stream The stream + * @return The tensor info + * @throws IOException Thrown if the stream fails to read + */ + private GGUFTensorInfo readTensorInfo(InputStream stream) throws IOException { + GGUFTensorInfo rVal = new GGUFTensorInfo(); + + rVal.name = this.readGGUFString(stream); + + rVal.nDimensions = this.streamReadInt(stream); + + rVal.dimensions = new long[rVal.nDimensions]; + for(int i = 0; i < rVal.nDimensions; i++){ + rVal.dimensions[i] = this.streamReadLong(stream); + } + + rVal.type = GGUFTensorInfo.getType(this.streamReadInt(stream)); + + rVal.offset = this.streamReadLong(stream); + + return rVal; + } + + /** + * Reads the binary tensor data for a given tensor info + * @param stream The stream + * @param tensorInfo The tensor metadata + * @return The binary tensor data + * @throws IOException Thrown if the stream fails to read + */ + private ByteBuffer readTensorData(InputStream stream, GGUFTensorInfo tensorInfo) throws IOException { + //read up to the offset + while(this.bytesRead < tensorInfo.getOffset()){ + stream.read(); + this.bytesRead++; + } + + long totalSize = 1; + for(int i = 0; i < tensorInfo.getnDimensions(); i++){ + totalSize = totalSize * tensorInfo.getDimensions()[i]; + } + float bitsPerWeight = GGUFTensorInfo.getUnitSize(tensorInfo.getType()); + System.out.println("Bits per weight: " + bitsPerWeight); + // totalSize = totalSize; + + + return null; + } + + /** + * Reads a little-endian char from the stream + * @param stream The stream + * @return The char + * @throws IOException Thrown if the stream fails to read + */ + private char streamReadChar(InputStream stream) throws IOException { + byte[] bytes = stream.readNBytes(1); + readerBuff.position(0); + readerBuff.put(bytes); + readerBuff.position(0); + this.bytesRead += 1; + return readerBuff.asCharBuffer().get(); + } + + /** + * Reads a little-endian short from the stream + * @param stream The stream + * @return The short + * @throws IOException Thrown if the stream fails to read + */ + private short streamReadShort(InputStream stream) throws IOException { + byte[] bytes = stream.readNBytes(2); + readerBuff.position(0); + readerBuff.put(bytes); + readerBuff.position(0); + this.bytesRead += 2; + return readerBuff.asShortBuffer().get(); + } + + /** + * Reads a little-endian int from the stream + * @param stream The stream + * @return The int + * @throws IOException Thrown if the stream fails to read + */ + private int streamReadInt(InputStream stream) throws IOException { + byte[] bytes = stream.readNBytes(4); + readerBuff.position(0); + readerBuff.put(bytes); + readerBuff.position(0); + this.bytesRead += 4; + return readerBuff.asIntBuffer().get(); + } + + /** + * Reads a little-endian float from the stream + * @param stream The stream + * @return The float + * @throws IOException Thrown if the stream fails to read + */ + private float streamReadFloat(InputStream stream) throws IOException { + byte[] bytes = stream.readNBytes(4); + readerBuff.position(0); + readerBuff.put(bytes); + readerBuff.position(0); + this.bytesRead += 4; + return readerBuff.asFloatBuffer().get(); + } + + /** + * Reads a little-endian long from the stream + * @param stream The stream + * @return The long + * @throws IOException Thrown if the stream fails to read + */ + private long streamReadLong(InputStream stream) throws IOException { + byte[] bytes = stream.readNBytes(8); + readerBuff.position(0); + readerBuff.put(bytes); + readerBuff.position(0); + this.bytesRead += 8; + return readerBuff.asLongBuffer().get(); + } + + /** + * Reads a little-endian double from the stream + * @param stream The stream + * @return The double + * @throws IOException Thrown if the stream fails to read + */ + private double streamReadDouble(InputStream stream) throws IOException { + byte[] bytes = stream.readNBytes(8); + readerBuff.position(0); + readerBuff.put(bytes); + readerBuff.position(0); + this.bytesRead += 8; + return readerBuff.asDoubleBuffer().get(); + } + + /** + * Reads a GGUF-format string + * @param stream The stream + * @return The Java string containing the data from the GGUF-format string + */ + private String readGGUFString(InputStream stream) throws IOException { + long length = this.streamReadLong(stream); + + byte[] bytes = stream.readNBytes((int)length); + this.bytesRead += length; + + String rVal = new String(bytes, StandardCharsets.UTF_8); + + return rVal; + } + +} diff --git a/src/main/java/org/studiorailgun/kobold/gguf/GGUFMetadataKV.java b/src/main/java/org/studiorailgun/kobold/gguf/GGUFMetadataKV.java new file mode 100644 index 0000000..7f9dbac --- /dev/null +++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFMetadataKV.java @@ -0,0 +1,57 @@ +package org.studiorailgun.kobold.gguf; + + +/** + * A metadata key-value pair + */ +public class GGUFMetadataKV { + + /** + * Metadata value types + */ + public static enum GGUFMetadataValue { + UINT8, + INT8, + UINT16, + INT16, + UINT32, + INT32, + FLOAT32, + BOOL, + STRING, + ARRAY, + UINT64, + INT64, + DOUBLE, + } + + /** + * The key for the metadata value + */ + protected String key; + + /** + * The type of the value + */ + protected GGUFMetadataValue value_type; //stored as an int on disk + + /** + * The actual value + */ + protected Object value; + + public String getKey() { + return key; + } + + public GGUFMetadataValue getValue_type() { + return value_type; + } + + public Object getValue() { + return value; + } + + + +} diff --git a/src/main/java/org/studiorailgun/kobold/gguf/GGUFModel.java b/src/main/java/org/studiorailgun/kobold/gguf/GGUFModel.java new file mode 100644 index 0000000..96f146d --- /dev/null +++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFModel.java @@ -0,0 +1,51 @@ +package org.studiorailgun.kobold.gguf; + +import java.nio.ByteBuffer; +import java.util.LinkedList; +import java.util.List; + +/** + * A GGUF format model + */ +public class GGUFModel { + + /** + * The header + */ + protected GGUFHeader header; + + /** + * The tensor info + */ + protected List tensorInfos = new LinkedList(); + + /** + * Padding to the nearest multiple of ALIGNMENT + */ + protected char _padding[]; + + /** + * The tensor data + */ + protected List tensorData = new LinkedList(); + + public GGUFHeader getHeader() { + return header; + } + + public List getTensorInfos() { + return tensorInfos; + } + + public char[] get_padding() { + return _padding; + } + + public List getTensorData() { + return tensorData; + } + + + + +} diff --git a/src/main/java/org/studiorailgun/kobold/gguf/GGUFString.java b/src/main/java/org/studiorailgun/kobold/gguf/GGUFString.java new file mode 100644 index 0000000..495d6e0 --- /dev/null +++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFString.java @@ -0,0 +1,18 @@ +package org.studiorailgun.kobold.gguf; + +/** + * A gguf format string + */ +public class GGUFString { + + /** + * The length of the string + */ + protected long len; + + /** + * The string itself + */ + protected String string; + +} diff --git a/src/main/java/org/studiorailgun/kobold/gguf/GGUFTensorInfo.java b/src/main/java/org/studiorailgun/kobold/gguf/GGUFTensorInfo.java new file mode 100644 index 0000000..6103899 --- /dev/null +++ b/src/main/java/org/studiorailgun/kobold/gguf/GGUFTensorInfo.java @@ -0,0 +1,378 @@ +package org.studiorailgun.kobold.gguf; + +/** + * Info on a tensor in the model + */ +public class GGUFTensorInfo { + + /** + * GGML types + */ + public static enum GGMLType { + GGML_TYPE_F32, + GGML_TYPE_F16, + GGML_TYPE_Q4_0, + GGML_TYPE_Q4_1, + GGML_TYPE_Q5_0, + GGML_TYPE_Q5_1, + GGML_TYPE_Q8_0, + GGML_TYPE_Q8_1, + GGML_TYPE_Q2_K, + GGML_TYPE_Q3_K, + GGML_TYPE_Q4_K, + GGML_TYPE_Q5_K, + GGML_TYPE_Q6_K, + GGML_TYPE_Q8_K, + GGML_TYPE_IQ2_XXS, + GGML_TYPE_IQ2_XS, + GGML_TYPE_IQ3_XXS, + GGML_TYPE_IQ1_S, + GGML_TYPE_IQ4_NL, + GGML_TYPE_IQ3_S, + GGML_TYPE_IQ2_S, + GGML_TYPE_IQ4_XS, + GGML_TYPE_I8, + GGML_TYPE_I16, + GGML_TYPE_I32, + GGML_TYPE_I64, + GGML_TYPE_F64, + GGML_TYPE_IQ1_M, + GGML_TYPE_COUNT, + } + + protected String name; + + protected int nDimensions; + + protected long[] dimensions; + + protected GGMLType type; //stored as an int on disk + + protected long offset; + + + + + public String getName() { + return name; + } + + + + + public int getnDimensions() { + return nDimensions; + } + + + + + public long[] getDimensions() { + return dimensions; + } + + + + + public GGMLType getType() { + return type; + } + + + + + public long getOffset() { + return offset; + } + + + + + /** + * Gets the GGML enum type from an int representation + * @param enumVal The value represented as an int + * @return The actual enum value + */ + protected static GGMLType getType(int enumVal){ + switch(enumVal){ + case 0: { + return GGMLType.GGML_TYPE_F32; + } + case 1: { + return GGMLType.GGML_TYPE_F16; + } + case 2: { + return GGMLType.GGML_TYPE_Q4_0; + } + case 3: { + return GGMLType.GGML_TYPE_Q4_1; + } + case 4: { + throw new Error("Tensor info type of deprecated type (GGML_TYPE_Q4_2)"); + } + case 5: { + throw new Error("Tensor info type of deprecated type (GGML_TYPE_Q4_3)"); + } + case 6: { + return GGMLType.GGML_TYPE_Q5_0; + } + case 7: { + return GGMLType.GGML_TYPE_Q5_1; + } + case 8: { + return GGMLType.GGML_TYPE_Q8_0; + } + case 9: { + return GGMLType.GGML_TYPE_Q8_1; + } + case 10: { + return GGMLType.GGML_TYPE_Q2_K; + } + case 11: { + return GGMLType.GGML_TYPE_Q3_K; + } + case 12: { + return GGMLType.GGML_TYPE_Q4_K; + } + case 13: { + return GGMLType.GGML_TYPE_Q5_K; + } + case 14: { + return GGMLType.GGML_TYPE_Q6_K; + } + case 15: { + return GGMLType.GGML_TYPE_Q8_K; + } + case 16: { + return GGMLType.GGML_TYPE_IQ2_XXS; + } + case 17: { + return GGMLType.GGML_TYPE_IQ2_XS; + } + case 18: { + return GGMLType.GGML_TYPE_IQ3_XXS; + } + case 19: { + return GGMLType.GGML_TYPE_IQ1_S; + } + case 20: { + return GGMLType.GGML_TYPE_IQ4_NL; + } + case 21: { + return GGMLType.GGML_TYPE_IQ3_S; + } + case 22: { + return GGMLType.GGML_TYPE_IQ2_S; + } + case 23: { + return GGMLType.GGML_TYPE_IQ4_XS; + } + case 24: { + return GGMLType.GGML_TYPE_I8; + } + case 25: { + return GGMLType.GGML_TYPE_I16; + } + case 26: { + return GGMLType.GGML_TYPE_I32; + } + case 27: { + return GGMLType.GGML_TYPE_I64; + } + case 28: { + return GGMLType.GGML_TYPE_F64; + } + case 29: { + return GGMLType.GGML_TYPE_IQ1_M; + } + default: { + return GGMLType.GGML_TYPE_COUNT; + } + } + } + + /** + * Gets the size in bits of the GGML type + * @param type The type + * @return The number of bits that type takes + */ + protected static float getUnitSize(GGMLType type){ + //reference: https://github.com/ggerganov/llama.cpp/wiki/Tensor-Encoding-Schemes + switch(type){ + case GGML_TYPE_F32: { + return 32; + } + case GGML_TYPE_F16: { + return 16; + } + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q4_0: { + return 4; + } + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: { + return 5; + } + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q8_1: { + return 8; + } + case GGML_TYPE_Q2_K: { + return 2.5625f; + } + case GGML_TYPE_Q3_K: { + return 3.4375f; + } + case GGML_TYPE_Q4_K: { + return 4.5f; + } + case GGML_TYPE_Q5_K: { + return 5.5f; + } + case GGML_TYPE_Q6_K: { + return 6.5625f; + } + case GGML_TYPE_Q8_K: { + return 8; + } + case GGML_TYPE_IQ2_XXS: { + return 2.0625f; + } + case GGML_TYPE_IQ2_XS: { + return 2.31f; + } + case GGML_TYPE_IQ3_XXS: { + return 3.0625f; + } + case GGML_TYPE_IQ1_S: { + return 1.5f; + } + case GGML_TYPE_IQ4_NL: { + return 4.5f; + } + case GGML_TYPE_IQ3_S: { + return 3.4375f; + } + case GGML_TYPE_IQ2_S: { + return 2.5f; + } + case GGML_TYPE_IQ4_XS: { + return 4.25f; + } + case GGML_TYPE_I8: { + return 8; + } + case GGML_TYPE_I16: { + return 16; + } + case GGML_TYPE_I32: { + return 32; + } + case GGML_TYPE_F64: + case GGML_TYPE_I64: { + return 64; + } + case GGML_TYPE_IQ1_M: { + return 1.75f; + } + case GGML_TYPE_COUNT: + default: { + throw new Error("Undefined type!"); + } + } + } + + /** + * Gets the block size of a given GGML type in bytes + * @param type The type + * @return The size of a block in the tensor data of this type in bytes + */ + protected static float getBlockSizeInBytes(GGMLType type){ + //reference: https://github.com/ggerganov/llama.cpp/wiki/Tensor-Encoding-Schemes + switch(type){ + + //regular types + case GGML_TYPE_I8: { + return 1; + } + case GGML_TYPE_F16: + case GGML_TYPE_I16: { + return 2; + } + case GGML_TYPE_F32: + case GGML_TYPE_I32: { + return 4; + } + case GGML_TYPE_F64: + case GGML_TYPE_I64: { + return 8; + } + + + + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q4_0: { + return 4; + } + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: { + return 5; + } + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q8_1: { + return 8; + } + case GGML_TYPE_Q2_K: { + return 2.5625f; + } + case GGML_TYPE_Q3_K: { + return 3.4375f; + } + case GGML_TYPE_Q4_K: { + return 4.5f; + } + case GGML_TYPE_Q5_K: { + return 5.5f; + } + case GGML_TYPE_Q6_K: { + return 6.5625f; + } + case GGML_TYPE_Q8_K: { + return 8; + } + case GGML_TYPE_IQ2_XXS: { + return 2.0625f; + } + case GGML_TYPE_IQ2_XS: { + return 2.31f; + } + case GGML_TYPE_IQ3_XXS: { + return 3.0625f; + } + case GGML_TYPE_IQ1_S: { + return 1.5f; + } + case GGML_TYPE_IQ4_NL: { + return 4.5f; + } + case GGML_TYPE_IQ3_S: { + return 3.4375f; + } + case GGML_TYPE_IQ2_S: { + return 2.5f; + } + case GGML_TYPE_IQ4_XS: { + return 4.25f; + } + case GGML_TYPE_IQ1_M: { + return 1.75f; + } + case GGML_TYPE_COUNT: + default: { + throw new Error("Undefined type!"); + } + } + } + + +} diff --git a/src/main/java/org/studiorailgun/kobold/gguf/quant/Quantization.java b/src/main/java/org/studiorailgun/kobold/gguf/quant/Quantization.java new file mode 100644 index 0000000..2c156a9 --- /dev/null +++ b/src/main/java/org/studiorailgun/kobold/gguf/quant/Quantization.java @@ -0,0 +1,245 @@ +package org.studiorailgun.kobold.gguf.quant; + +import java.nio.ByteBuffer; + +/** + * 2-bit quanization + */ +public class Quantization { + + //reference for superblock structs: https://github.com/byroneverson/llm.cpp/blob/master/k_quants.h + //also maybe reference: https://github.com/ggerganov/llama.cpp/blob/master/ggml/src/ggml-common.h + //tensor encoding scheme wiki page: https://github.com/ggerganov/llama.cpp/wiki/Tensor-Encoding-Schemes + //thread explaining quantization scheme: https://github.com/ggerganov/llama.cpp/pull/8151 + //https://github.com/ggerganov/llama.cpp/blob/75af08c475e285888f66556d0f459c533b7deb95/ggml/src/ggml-impl.h + + /** + * Super block size + */ + static final int QK_K = 256; + + /** + * Scale size + */ + static final int K_SCALE_SIZE = 12; + + /** + * A buffer used for type conversion + */ + static ByteBuffer conversionBuffer = ByteBuffer.allocate(8); + + + /** + * 2-bit quantization + */ + public static class Block_Q2_K implements Superblock { + + /** + * scales and mins, quantized with 4 bits + */ + char[] scales = new char[QK_K/16]; + + /** + * quants + */ + char[] qs = new char[QK_K/4]; + + /** + * super block scale for quantized scales + */ + short d; + + /** + * super block scale for quantized mins + */ + short dmin; + + /** + * Gets the size of the superblock + * @return The size of the superblock + */ + public int getSize(){ + return 0; + } + + } + + /** + * 3-bit quantization + */ + public static class Block_Q3_K implements Superblock { + + /** + * quants - high bit + */ + char[] hmask = new char[QK_K/8]; + + /** + * quants - low 2 bits + */ + char[] qs = new char[QK_K/4]; + + /** + * scales, quantized with 6 bits + */ + char[] scales = new char[12]; + + /** + * super block scale + */ + short dmin; + + /** + * Gets the size of the superblock + * @return The size of the superblock + */ + public int getSize(){ + return 0; + } + + } + + /** + * 4-bit quantization + */ + public static class Block_Q4_K implements Superblock { + + /** + * Super block scale for quantized scales + */ + short d; + + /** + * Super block scale for quantized mins + */ + short dmin; + + /** + * Shales and mins, quantized with 6 bits + */ + char[] scales = new char[K_SCALE_SIZE]; + + /** + * 4-bit quants + */ + char[] qs = new char[QK_K/2]; + + /** + * Gets the size of the superblock + * @return The size of the superblock + */ + public int getSize(){ + return 0; + } + + } + + /** + * 5-bit quantization + */ + public static class Block_Q5_K implements Superblock { + + /** + * Super block scale for quantized scales + */ + short d; + + /** + * Super block scale for quantized mins + */ + short dmin; + + /** + * Scales and mins, quantized with 6 bits + */ + char[] scales = new char[K_SCALE_SIZE]; + + /** + * quants, high bit + */ + char[] qh = new char[QK_K/8]; + + /** + * quants, low 4 bits + */ + char[] qs = new char[QK_K/2]; + + /** + * Gets the size of the superblock + * @return The size of the superblock + */ + public int getSize(){ + return 0; + } + + } + + /** + * 6-bit quantization + */ + public static class Block_Q6_K implements Superblock { + + /** + * Quants, lower 4 bits + */ + char[] ql = new char[QK_K/2]; + + /** + * Quants, upper 2 bits + */ + char[] qh = new char[QK_K/4]; + + /** + * scales, quantized with 8 bits + */ + char[] scales = new char[QK_K/16]; + + /** + * super block scale + */ + short d; + + /** + * Gets the size of the superblock + * @return The size of the superblock + */ + public int getSize(){ + return QK_K/2 + QK_K/4 + QK_K/16 + 2; + } + + } + + /** + * 8-bit quantization + */ + public static class Block_Q8_K implements Superblock { + + /** + * Delta + */ + float d; + + /** + * quants + */ + char[] qs = new char[QK_K]; + + /** + * sum of the quants in groups of 16 + */ + short[] bsums = new short[QK_K/16]; + + /** + * Gets the size of the superblock + * @return The size of the superblock + */ + public int getSize(){ + return 0; + } + + } + + + + +} diff --git a/src/main/java/org/studiorailgun/kobold/gguf/quant/Superblock.java b/src/main/java/org/studiorailgun/kobold/gguf/quant/Superblock.java new file mode 100644 index 0000000..11c1754 --- /dev/null +++ b/src/main/java/org/studiorailgun/kobold/gguf/quant/Superblock.java @@ -0,0 +1,14 @@ +package org.studiorailgun.kobold.gguf.quant; + +/** + * A superblock of quantized weights + */ +public interface Superblock { + + /** + * Gets the size of the superblock + * @return The size of the superblock + */ + public int getSize(); + +}