gguf header loading
All checks were successful
studiorailgun/trpg/pipeline/head This commit looks good

This commit is contained in:
austin 2025-01-07 13:03:58 -05:00
parent 8e5546d772
commit 56594eb667
10 changed files with 1251 additions and 0 deletions

1
llama.log Normal file
View File

@ -0,0 +1 @@
[1736213723] warming up the model with an empty run

View File

@ -1,6 +1,16 @@
package org.studiorailgun; package org.studiorailgun;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import org.studiorailgun.interact.GameLoop; import org.studiorailgun.interact.GameLoop;
import org.studiorailgun.kobold.gguf.GGUFHeader;
import org.studiorailgun.kobold.gguf.GGUFLoader;
import org.studiorailgun.kobold.gguf.GGUFMetadataKV;
import org.studiorailgun.kobold.gguf.GGUFModel;
import org.studiorailgun.kobold.gguf.GGUFTensorInfo;
/** /**
* The main class * The main class
@ -11,6 +21,25 @@ public class Main {
* The main method * The main method
*/ */
public static void main(String[] args){ public static void main(String[] args){
try {
GGUFLoader loader = new GGUFLoader();
GGUFModel model = loader.load("C:\\Users\\satellite\\Documents\\ai\\koboldcpp\\Fimbulvetr-Kuro-Lotus-10.7B-Q6_K.gguf");
System.out.println("Metadata: ");
GGUFHeader header = model.getHeader();
for(GGUFMetadataKV pair : header.getMetadataPairs()){
System.out.println(pair.getKey() + " - " + pair.getValue_type() + " - " + pair.getValue());
}
System.out.println("\n");
for(GGUFTensorInfo tensorInfo : model.getTensorInfos()){
System.out.println(tensorInfo.getName() + " - " + tensorInfo.getType() + " - " + tensorInfo.getOffset());
}
System.exit(0);
} catch (IOException e) {
e.printStackTrace();
}
GameLoop.main(); GameLoop.main();
} }

View File

@ -0,0 +1,43 @@
package org.studiorailgun.kobold.gguf;
import java.util.LinkedList;
import java.util.List;
/**
* The header of a gguf file
*/
public class GGUFHeader {
protected int magic;
protected int version;
protected long tensorCount;
protected long metadataKVCount;
protected List<GGUFMetadataKV> metadataPairs = new LinkedList<GGUFMetadataKV>();
public int getMagic() {
return magic;
}
public int getVersion() {
return version;
}
public long getTensorCount() {
return tensorCount;
}
public long getMetadataKVCount() {
return metadataKVCount;
}
public List<GGUFMetadataKV> getMetadataPairs() {
return metadataPairs;
}
}

View File

@ -0,0 +1,415 @@
package org.studiorailgun.kobold.gguf;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import org.studiorailgun.kobold.gguf.GGUFMetadataKV.GGUFMetadataValue;
/**
* Loads a gguf format model
*/
public class GGUFLoader {
//gguf core explanation: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md
/**
* The default alignment for the file
*/
static final int DEFAULT_ALIGNMENT = 32;
/**
* Used for converting bytes read in to little endian
*/
ByteBuffer readerBuff = ByteBuffer.allocate(8);
/**
* The alignment of the file
*/
int alignment = DEFAULT_ALIGNMENT;
/**
* The number of bytes read -- used for keeping track of alignment
*/
long bytesRead = 0;
/**
* Constructor
*/
public GGUFLoader(){
readerBuff.order(ByteOrder.LITTLE_ENDIAN);
}
/**
* Loads the gguf model at the specified path
* @param path The path
*/
public GGUFModel load(String path) throws IOException {
InputStream stream = Files.newInputStream(new File(path).toPath());
GGUFModel rVal = new GGUFModel();
//read the header
rVal.header = this.readHeader(stream);
//read the tensor info
for(int i = 0; i < rVal.header.tensorCount; i++){
rVal.tensorInfos.add(this.readTensorInfo(stream));
}
//read padding
while(bytesRead % (alignment / 8) > 0){
stream.read();
bytesRead++;
}
//read the tensor data
for(int i = 0; i < rVal.tensorInfos.size(); i++){
GGUFTensorInfo tensorInfo = rVal.tensorInfos.get(i);
rVal.tensorData.add(this.readTensorData(stream,tensorInfo));
}
stream.close();
return rVal;
}
/**
* Reads a GGUF header from an input stream
* @param stream The stream
* @return The header
*/
private GGUFHeader readHeader(InputStream stream) throws IOException {
GGUFHeader rVal = new GGUFHeader();
rVal.magic = this.streamReadInt(stream);
rVal.version = this.streamReadInt(stream);
rVal.tensorCount = this.streamReadLong(stream);
rVal.metadataKVCount = this.streamReadLong(stream);
System.out.println(rVal.magic + " " + rVal.version + " " + rVal.tensorCount + " " + rVal.metadataKVCount);
//read in the metadata kv pairs
for(int i = 0; i < rVal.metadataKVCount; i++){
rVal.metadataPairs.add(this.readMetadataPair(stream));
}
return rVal;
}
/**
* Reads in a GGUF metadata pair
* @param stream The stream
* @return The pair
*/
private GGUFMetadataKV readMetadataPair(InputStream stream) throws IOException {
GGUFMetadataKV pair = new GGUFMetadataKV();
pair.key = this.readGGUFString(stream);
int typeRaw = this.streamReadInt(stream);
switch(typeRaw){
case 0: {
pair.value_type = GGUFMetadataValue.UINT8;
pair.value = this.streamReadChar(stream);
} break;
case 1: {
pair.value_type = GGUFMetadataValue.INT8;
pair.value = this.streamReadChar(stream);
} break;
case 2: {
pair.value_type = GGUFMetadataValue.UINT16;
pair.value = this.streamReadShort(stream);
} break;
case 3: {
pair.value_type = GGUFMetadataValue.INT16;
pair.value = this.streamReadShort(stream);
} break;
case 4: {
pair.value_type = GGUFMetadataValue.UINT32;
pair.value = this.streamReadInt(stream);
} break;
case 5: {
pair.value_type = GGUFMetadataValue.INT32;
pair.value = this.streamReadInt(stream);
} break;
case 6: {
pair.value_type = GGUFMetadataValue.FLOAT32;
pair.value = this.streamReadFloat(stream);
} break;
case 7: {
pair.value_type = GGUFMetadataValue.BOOL;
pair.value = this.streamReadChar(stream);
} break;
case 8: {
pair.value_type = GGUFMetadataValue.STRING;
pair.value = this.readGGUFString(stream);
} break;
case 9: {
pair.value_type = GGUFMetadataValue.ARRAY;
pair.value = this.readMetadataArray(stream);
} break;
case 10: {
pair.value_type = GGUFMetadataValue.UINT64;
pair.value = this.streamReadLong(stream);
} break;
case 11: {
pair.value_type = GGUFMetadataValue.INT64;
pair.value = this.streamReadLong(stream);
} break;
case 12: {
pair.value_type = GGUFMetadataValue.DOUBLE;
pair.value = this.streamReadDouble(stream);
} break;
default: {
throw new Error("Invalid metadata pair data type! " + typeRaw);
}
}
return pair;
}
/**
* Reads a metadata array value
* @param stream The stream
* @return The array value
* @throws IOException Thrown if the stream has an io exception
*/
private Object readMetadataArray(InputStream stream) throws IOException {
int typeRaw = this.streamReadInt(stream);
int len = (int)this.streamReadLong(stream);
switch(typeRaw){
case 0:
case 1: {
char[] rVal = new char[len];
for(int i = 0; i < len; i++){
rVal[i] = this.streamReadChar(stream);
}
return rVal;
}
case 2:
case 3: {
short[] rVal = new short[len];
for(int i = 0; i < len; i++){
rVal[i] = this.streamReadShort(stream);
}
return rVal;
}
case 4:
case 5: {
int[] rVal = new int[len];
for(int i = 0; i < len; i++){
rVal[i] = this.streamReadInt(stream);
}
return rVal;
}
case 6: {
float[] rVal = new float[len];
for(int i = 0; i < len; i++){
rVal[i] = this.streamReadFloat(stream);
}
return rVal;
}
case 7: {
boolean[] rVal = new boolean[len];
for(int i = 0; i < len; i++){
rVal[i] = this.streamReadChar(stream) > 0;
}
return rVal;
}
case 8: {
String[] rVal = new String[len];
for(int i = 0; i < len; i++){
rVal[i] = this.readGGUFString(stream);
}
return rVal;
}
case 9: {
Object[] rVal = new Object[len];
for(int i = 0; i < len; i++){
rVal[i] = this.readMetadataArray(stream);
}
return rVal;
}
case 10:
case 11: {
long[] rVal = new long[len];
for(int i = 0; i < len; i++){
rVal[i] = this.streamReadLong(stream);
}
return rVal;
}
case 12: {
double[] rVal = new double[len];
for(int i = 0; i < len; i++){
rVal[i] = this.streamReadDouble(stream);
}
return rVal;
}
default: {
throw new Error("Failed to read metadata array -- invalid type! " + typeRaw);
}
}
}
/**
* Reads info on a tensor from the file
* @param stream The stream
* @return The tensor info
* @throws IOException Thrown if the stream fails to read
*/
private GGUFTensorInfo readTensorInfo(InputStream stream) throws IOException {
GGUFTensorInfo rVal = new GGUFTensorInfo();
rVal.name = this.readGGUFString(stream);
rVal.nDimensions = this.streamReadInt(stream);
rVal.dimensions = new long[rVal.nDimensions];
for(int i = 0; i < rVal.nDimensions; i++){
rVal.dimensions[i] = this.streamReadLong(stream);
}
rVal.type = GGUFTensorInfo.getType(this.streamReadInt(stream));
rVal.offset = this.streamReadLong(stream);
return rVal;
}
/**
* Reads the binary tensor data for a given tensor info
* @param stream The stream
* @param tensorInfo The tensor metadata
* @return The binary tensor data
* @throws IOException Thrown if the stream fails to read
*/
private ByteBuffer readTensorData(InputStream stream, GGUFTensorInfo tensorInfo) throws IOException {
//read up to the offset
while(this.bytesRead < tensorInfo.getOffset()){
stream.read();
this.bytesRead++;
}
long totalSize = 1;
for(int i = 0; i < tensorInfo.getnDimensions(); i++){
totalSize = totalSize * tensorInfo.getDimensions()[i];
}
float bitsPerWeight = GGUFTensorInfo.getUnitSize(tensorInfo.getType());
System.out.println("Bits per weight: " + bitsPerWeight);
// totalSize = totalSize;
return null;
}
/**
* Reads a little-endian char from the stream
* @param stream The stream
* @return The char
* @throws IOException Thrown if the stream fails to read
*/
private char streamReadChar(InputStream stream) throws IOException {
byte[] bytes = stream.readNBytes(1);
readerBuff.position(0);
readerBuff.put(bytes);
readerBuff.position(0);
this.bytesRead += 1;
return readerBuff.asCharBuffer().get();
}
/**
* Reads a little-endian short from the stream
* @param stream The stream
* @return The short
* @throws IOException Thrown if the stream fails to read
*/
private short streamReadShort(InputStream stream) throws IOException {
byte[] bytes = stream.readNBytes(2);
readerBuff.position(0);
readerBuff.put(bytes);
readerBuff.position(0);
this.bytesRead += 2;
return readerBuff.asShortBuffer().get();
}
/**
* Reads a little-endian int from the stream
* @param stream The stream
* @return The int
* @throws IOException Thrown if the stream fails to read
*/
private int streamReadInt(InputStream stream) throws IOException {
byte[] bytes = stream.readNBytes(4);
readerBuff.position(0);
readerBuff.put(bytes);
readerBuff.position(0);
this.bytesRead += 4;
return readerBuff.asIntBuffer().get();
}
/**
* Reads a little-endian float from the stream
* @param stream The stream
* @return The float
* @throws IOException Thrown if the stream fails to read
*/
private float streamReadFloat(InputStream stream) throws IOException {
byte[] bytes = stream.readNBytes(4);
readerBuff.position(0);
readerBuff.put(bytes);
readerBuff.position(0);
this.bytesRead += 4;
return readerBuff.asFloatBuffer().get();
}
/**
* Reads a little-endian long from the stream
* @param stream The stream
* @return The long
* @throws IOException Thrown if the stream fails to read
*/
private long streamReadLong(InputStream stream) throws IOException {
byte[] bytes = stream.readNBytes(8);
readerBuff.position(0);
readerBuff.put(bytes);
readerBuff.position(0);
this.bytesRead += 8;
return readerBuff.asLongBuffer().get();
}
/**
* Reads a little-endian double from the stream
* @param stream The stream
* @return The double
* @throws IOException Thrown if the stream fails to read
*/
private double streamReadDouble(InputStream stream) throws IOException {
byte[] bytes = stream.readNBytes(8);
readerBuff.position(0);
readerBuff.put(bytes);
readerBuff.position(0);
this.bytesRead += 8;
return readerBuff.asDoubleBuffer().get();
}
/**
* Reads a GGUF-format string
* @param stream The stream
* @return The Java string containing the data from the GGUF-format string
*/
private String readGGUFString(InputStream stream) throws IOException {
long length = this.streamReadLong(stream);
byte[] bytes = stream.readNBytes((int)length);
this.bytesRead += length;
String rVal = new String(bytes, StandardCharsets.UTF_8);
return rVal;
}
}

View File

@ -0,0 +1,57 @@
package org.studiorailgun.kobold.gguf;
/**
* A metadata key-value pair
*/
public class GGUFMetadataKV {
/**
* Metadata value types
*/
public static enum GGUFMetadataValue {
UINT8,
INT8,
UINT16,
INT16,
UINT32,
INT32,
FLOAT32,
BOOL,
STRING,
ARRAY,
UINT64,
INT64,
DOUBLE,
}
/**
* The key for the metadata value
*/
protected String key;
/**
* The type of the value
*/
protected GGUFMetadataValue value_type; //stored as an int on disk
/**
* The actual value
*/
protected Object value;
public String getKey() {
return key;
}
public GGUFMetadataValue getValue_type() {
return value_type;
}
public Object getValue() {
return value;
}
}

View File

@ -0,0 +1,51 @@
package org.studiorailgun.kobold.gguf;
import java.nio.ByteBuffer;
import java.util.LinkedList;
import java.util.List;
/**
* A GGUF format model
*/
public class GGUFModel {
/**
* The header
*/
protected GGUFHeader header;
/**
* The tensor info
*/
protected List<GGUFTensorInfo> tensorInfos = new LinkedList<GGUFTensorInfo>();
/**
* Padding to the nearest multiple of ALIGNMENT
*/
protected char _padding[];
/**
* The tensor data
*/
protected List<ByteBuffer> tensorData = new LinkedList<ByteBuffer>();
public GGUFHeader getHeader() {
return header;
}
public List<GGUFTensorInfo> getTensorInfos() {
return tensorInfos;
}
public char[] get_padding() {
return _padding;
}
public List<ByteBuffer> getTensorData() {
return tensorData;
}
}

View File

@ -0,0 +1,18 @@
package org.studiorailgun.kobold.gguf;
/**
* A gguf format string
*/
public class GGUFString {
/**
* The length of the string
*/
protected long len;
/**
* The string itself
*/
protected String string;
}

View File

@ -0,0 +1,378 @@
package org.studiorailgun.kobold.gguf;
/**
* Info on a tensor in the model
*/
public class GGUFTensorInfo {
/**
* GGML types
*/
public static enum GGMLType {
GGML_TYPE_F32,
GGML_TYPE_F16,
GGML_TYPE_Q4_0,
GGML_TYPE_Q4_1,
GGML_TYPE_Q5_0,
GGML_TYPE_Q5_1,
GGML_TYPE_Q8_0,
GGML_TYPE_Q8_1,
GGML_TYPE_Q2_K,
GGML_TYPE_Q3_K,
GGML_TYPE_Q4_K,
GGML_TYPE_Q5_K,
GGML_TYPE_Q6_K,
GGML_TYPE_Q8_K,
GGML_TYPE_IQ2_XXS,
GGML_TYPE_IQ2_XS,
GGML_TYPE_IQ3_XXS,
GGML_TYPE_IQ1_S,
GGML_TYPE_IQ4_NL,
GGML_TYPE_IQ3_S,
GGML_TYPE_IQ2_S,
GGML_TYPE_IQ4_XS,
GGML_TYPE_I8,
GGML_TYPE_I16,
GGML_TYPE_I32,
GGML_TYPE_I64,
GGML_TYPE_F64,
GGML_TYPE_IQ1_M,
GGML_TYPE_COUNT,
}
protected String name;
protected int nDimensions;
protected long[] dimensions;
protected GGMLType type; //stored as an int on disk
protected long offset;
public String getName() {
return name;
}
public int getnDimensions() {
return nDimensions;
}
public long[] getDimensions() {
return dimensions;
}
public GGMLType getType() {
return type;
}
public long getOffset() {
return offset;
}
/**
* Gets the GGML enum type from an int representation
* @param enumVal The value represented as an int
* @return The actual enum value
*/
protected static GGMLType getType(int enumVal){
switch(enumVal){
case 0: {
return GGMLType.GGML_TYPE_F32;
}
case 1: {
return GGMLType.GGML_TYPE_F16;
}
case 2: {
return GGMLType.GGML_TYPE_Q4_0;
}
case 3: {
return GGMLType.GGML_TYPE_Q4_1;
}
case 4: {
throw new Error("Tensor info type of deprecated type (GGML_TYPE_Q4_2)");
}
case 5: {
throw new Error("Tensor info type of deprecated type (GGML_TYPE_Q4_3)");
}
case 6: {
return GGMLType.GGML_TYPE_Q5_0;
}
case 7: {
return GGMLType.GGML_TYPE_Q5_1;
}
case 8: {
return GGMLType.GGML_TYPE_Q8_0;
}
case 9: {
return GGMLType.GGML_TYPE_Q8_1;
}
case 10: {
return GGMLType.GGML_TYPE_Q2_K;
}
case 11: {
return GGMLType.GGML_TYPE_Q3_K;
}
case 12: {
return GGMLType.GGML_TYPE_Q4_K;
}
case 13: {
return GGMLType.GGML_TYPE_Q5_K;
}
case 14: {
return GGMLType.GGML_TYPE_Q6_K;
}
case 15: {
return GGMLType.GGML_TYPE_Q8_K;
}
case 16: {
return GGMLType.GGML_TYPE_IQ2_XXS;
}
case 17: {
return GGMLType.GGML_TYPE_IQ2_XS;
}
case 18: {
return GGMLType.GGML_TYPE_IQ3_XXS;
}
case 19: {
return GGMLType.GGML_TYPE_IQ1_S;
}
case 20: {
return GGMLType.GGML_TYPE_IQ4_NL;
}
case 21: {
return GGMLType.GGML_TYPE_IQ3_S;
}
case 22: {
return GGMLType.GGML_TYPE_IQ2_S;
}
case 23: {
return GGMLType.GGML_TYPE_IQ4_XS;
}
case 24: {
return GGMLType.GGML_TYPE_I8;
}
case 25: {
return GGMLType.GGML_TYPE_I16;
}
case 26: {
return GGMLType.GGML_TYPE_I32;
}
case 27: {
return GGMLType.GGML_TYPE_I64;
}
case 28: {
return GGMLType.GGML_TYPE_F64;
}
case 29: {
return GGMLType.GGML_TYPE_IQ1_M;
}
default: {
return GGMLType.GGML_TYPE_COUNT;
}
}
}
/**
* Gets the size in bits of the GGML type
* @param type The type
* @return The number of bits that type takes
*/
protected static float getUnitSize(GGMLType type){
//reference: https://github.com/ggerganov/llama.cpp/wiki/Tensor-Encoding-Schemes
switch(type){
case GGML_TYPE_F32: {
return 32;
}
case GGML_TYPE_F16: {
return 16;
}
case GGML_TYPE_Q4_1:
case GGML_TYPE_Q4_0: {
return 4;
}
case GGML_TYPE_Q5_0:
case GGML_TYPE_Q5_1: {
return 5;
}
case GGML_TYPE_Q8_0:
case GGML_TYPE_Q8_1: {
return 8;
}
case GGML_TYPE_Q2_K: {
return 2.5625f;
}
case GGML_TYPE_Q3_K: {
return 3.4375f;
}
case GGML_TYPE_Q4_K: {
return 4.5f;
}
case GGML_TYPE_Q5_K: {
return 5.5f;
}
case GGML_TYPE_Q6_K: {
return 6.5625f;
}
case GGML_TYPE_Q8_K: {
return 8;
}
case GGML_TYPE_IQ2_XXS: {
return 2.0625f;
}
case GGML_TYPE_IQ2_XS: {
return 2.31f;
}
case GGML_TYPE_IQ3_XXS: {
return 3.0625f;
}
case GGML_TYPE_IQ1_S: {
return 1.5f;
}
case GGML_TYPE_IQ4_NL: {
return 4.5f;
}
case GGML_TYPE_IQ3_S: {
return 3.4375f;
}
case GGML_TYPE_IQ2_S: {
return 2.5f;
}
case GGML_TYPE_IQ4_XS: {
return 4.25f;
}
case GGML_TYPE_I8: {
return 8;
}
case GGML_TYPE_I16: {
return 16;
}
case GGML_TYPE_I32: {
return 32;
}
case GGML_TYPE_F64:
case GGML_TYPE_I64: {
return 64;
}
case GGML_TYPE_IQ1_M: {
return 1.75f;
}
case GGML_TYPE_COUNT:
default: {
throw new Error("Undefined type!");
}
}
}
/**
* Gets the block size of a given GGML type in bytes
* @param type The type
* @return The size of a block in the tensor data of this type in bytes
*/
protected static float getBlockSizeInBytes(GGMLType type){
//reference: https://github.com/ggerganov/llama.cpp/wiki/Tensor-Encoding-Schemes
switch(type){
//regular types
case GGML_TYPE_I8: {
return 1;
}
case GGML_TYPE_F16:
case GGML_TYPE_I16: {
return 2;
}
case GGML_TYPE_F32:
case GGML_TYPE_I32: {
return 4;
}
case GGML_TYPE_F64:
case GGML_TYPE_I64: {
return 8;
}
case GGML_TYPE_Q4_1:
case GGML_TYPE_Q4_0: {
return 4;
}
case GGML_TYPE_Q5_0:
case GGML_TYPE_Q5_1: {
return 5;
}
case GGML_TYPE_Q8_0:
case GGML_TYPE_Q8_1: {
return 8;
}
case GGML_TYPE_Q2_K: {
return 2.5625f;
}
case GGML_TYPE_Q3_K: {
return 3.4375f;
}
case GGML_TYPE_Q4_K: {
return 4.5f;
}
case GGML_TYPE_Q5_K: {
return 5.5f;
}
case GGML_TYPE_Q6_K: {
return 6.5625f;
}
case GGML_TYPE_Q8_K: {
return 8;
}
case GGML_TYPE_IQ2_XXS: {
return 2.0625f;
}
case GGML_TYPE_IQ2_XS: {
return 2.31f;
}
case GGML_TYPE_IQ3_XXS: {
return 3.0625f;
}
case GGML_TYPE_IQ1_S: {
return 1.5f;
}
case GGML_TYPE_IQ4_NL: {
return 4.5f;
}
case GGML_TYPE_IQ3_S: {
return 3.4375f;
}
case GGML_TYPE_IQ2_S: {
return 2.5f;
}
case GGML_TYPE_IQ4_XS: {
return 4.25f;
}
case GGML_TYPE_IQ1_M: {
return 1.75f;
}
case GGML_TYPE_COUNT:
default: {
throw new Error("Undefined type!");
}
}
}
}

View File

@ -0,0 +1,245 @@
package org.studiorailgun.kobold.gguf.quant;
import java.nio.ByteBuffer;
/**
* 2-bit quanization
*/
public class Quantization {
//reference for superblock structs: https://github.com/byroneverson/llm.cpp/blob/master/k_quants.h
//also maybe reference: https://github.com/ggerganov/llama.cpp/blob/master/ggml/src/ggml-common.h
//tensor encoding scheme wiki page: https://github.com/ggerganov/llama.cpp/wiki/Tensor-Encoding-Schemes
//thread explaining quantization scheme: https://github.com/ggerganov/llama.cpp/pull/8151
//https://github.com/ggerganov/llama.cpp/blob/75af08c475e285888f66556d0f459c533b7deb95/ggml/src/ggml-impl.h
/**
* Super block size
*/
static final int QK_K = 256;
/**
* Scale size
*/
static final int K_SCALE_SIZE = 12;
/**
* A buffer used for type conversion
*/
static ByteBuffer conversionBuffer = ByteBuffer.allocate(8);
/**
* 2-bit quantization
*/
public static class Block_Q2_K implements Superblock {
/**
* scales and mins, quantized with 4 bits
*/
char[] scales = new char[QK_K/16];
/**
* quants
*/
char[] qs = new char[QK_K/4];
/**
* super block scale for quantized scales
*/
short d;
/**
* super block scale for quantized mins
*/
short dmin;
/**
* Gets the size of the superblock
* @return The size of the superblock
*/
public int getSize(){
return 0;
}
}
/**
* 3-bit quantization
*/
public static class Block_Q3_K implements Superblock {
/**
* quants - high bit
*/
char[] hmask = new char[QK_K/8];
/**
* quants - low 2 bits
*/
char[] qs = new char[QK_K/4];
/**
* scales, quantized with 6 bits
*/
char[] scales = new char[12];
/**
* super block scale
*/
short dmin;
/**
* Gets the size of the superblock
* @return The size of the superblock
*/
public int getSize(){
return 0;
}
}
/**
* 4-bit quantization
*/
public static class Block_Q4_K implements Superblock {
/**
* Super block scale for quantized scales
*/
short d;
/**
* Super block scale for quantized mins
*/
short dmin;
/**
* Shales and mins, quantized with 6 bits
*/
char[] scales = new char[K_SCALE_SIZE];
/**
* 4-bit quants
*/
char[] qs = new char[QK_K/2];
/**
* Gets the size of the superblock
* @return The size of the superblock
*/
public int getSize(){
return 0;
}
}
/**
* 5-bit quantization
*/
public static class Block_Q5_K implements Superblock {
/**
* Super block scale for quantized scales
*/
short d;
/**
* Super block scale for quantized mins
*/
short dmin;
/**
* Scales and mins, quantized with 6 bits
*/
char[] scales = new char[K_SCALE_SIZE];
/**
* quants, high bit
*/
char[] qh = new char[QK_K/8];
/**
* quants, low 4 bits
*/
char[] qs = new char[QK_K/2];
/**
* Gets the size of the superblock
* @return The size of the superblock
*/
public int getSize(){
return 0;
}
}
/**
* 6-bit quantization
*/
public static class Block_Q6_K implements Superblock {
/**
* Quants, lower 4 bits
*/
char[] ql = new char[QK_K/2];
/**
* Quants, upper 2 bits
*/
char[] qh = new char[QK_K/4];
/**
* scales, quantized with 8 bits
*/
char[] scales = new char[QK_K/16];
/**
* super block scale
*/
short d;
/**
* Gets the size of the superblock
* @return The size of the superblock
*/
public int getSize(){
return QK_K/2 + QK_K/4 + QK_K/16 + 2;
}
}
/**
* 8-bit quantization
*/
public static class Block_Q8_K implements Superblock {
/**
* Delta
*/
float d;
/**
* quants
*/
char[] qs = new char[QK_K];
/**
* sum of the quants in groups of 16
*/
short[] bsums = new short[QK_K/16];
/**
* Gets the size of the superblock
* @return The size of the superblock
*/
public int getSize(){
return 0;
}
}
}

View File

@ -0,0 +1,14 @@
package org.studiorailgun.kobold.gguf.quant;
/**
* A superblock of quantized weights
*/
public interface Superblock {
/**
* Gets the size of the superblock
* @return The size of the superblock
*/
public int getSize();
}