R bindings for the GGML tensor library, optimized for CPU computations. This package provides low-level tensor operations for machine learning, particularly useful for LLM inference and other deep learning tasks on CPU.
# From source
install.packages("ggmlR_0.1.0.tar.gz", repos = NULL, type = "source")
# Or using devtools
devtools::install_github("yourusername/ggmlR")library(ggmlR)
# Initialize context
ctx <- ggml_init(16 * 1024 * 1024) # 16MB
# Create tensors
a <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 10)
b <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 10)
# Set data
ggml_set_f32(a, rnorm(10))
ggml_set_f32(b, rnorm(10))
# Perform operations
c <- ggml_add(ctx, a, b)
# Compute
graph <- ggml_build_forward_expand(ctx, c)
ggml_graph_compute(ctx, graph)
# Get results
result <- ggml_get_f32(c)
# Cleanup
ggml_free(ctx)ctx <- ggml_init(16 * 1024 * 1024)
# Create matrices
A <- ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 100, 200) # 100x200
B <- ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 200, 50) # 200x50
# Initialize with random data
ggml_set_f32(A, rnorm(100 * 200))
ggml_set_f32(B, rnorm(200 * 50))
# Matrix multiplication: C = A * B (100x50)
C <- ggml_mul_mat(ctx, A, B)
# Compute
graph <- ggml_build_forward_expand(ctx, C)
ggml_graph_compute(ctx, graph)
result <- ggml_get_f32(C)
ggml_free(ctx)ctx <- ggml_init(128 * 1024 * 1024)
# Input
input <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 128)
ggml_set_f32(input, rnorm(128))
# Weights and bias
W <- ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 128, 256)
b <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 256)
ggml_set_f32(W, rnorm(128 * 256, sd = 0.01))
ggml_set_f32(b, rep(0, 256))
# Forward: GELU(W * input + b)
h <- ggml_mul_mat(ctx, W, input)
h <- ggml_add(ctx, h, b)
output <- ggml_gelu(ctx, h)
# Compute
graph <- ggml_build_forward_expand(ctx, output)
ggml_graph_compute(ctx, graph)
result <- ggml_get_f32(output)
ggml_free(ctx)ggml_new_tensor_1d() - 1D tensor (vector)ggml_new_tensor_2d() - 2D tensor (matrix)ggml_new_tensor_3d() - 3D tensorggml_new_tensor_4d() - 4D tensorGGML_TYPE_F32 - 32-bit floatGGML_TYPE_F16 - 16-bit floatGGML_TYPE_Q4_0, GGML_TYPE_Q4_1 - 4-bit
quantizedGGML_TYPE_Q5_0, GGML_TYPE_Q5_1 - 5-bit
quantizedGGML_TYPE_Q8_0, GGML_TYPE_Q8_1 - 8-bit
quantizedggml_mul_mat() - Matrix multiplicationggml_add() - Element-wise additionggml_mul() - Element-wise multiplicationggml_relu() - ReLU activationggml_gelu() - GELU activationggml_silu() - SiLU/Swish activationggml_norm() - Layer normalizationggml_rms_norm() - RMS normalizationThis package is designed for running language model inference on CPU: - Load quantized model weights - Build transformer layers - Run token-by-token generation - Efficient memory usage with quantization
Can be used for diffusion model inference: - U-Net architecture building blocks - Attention mechanisms - Residual connections - Normalization layers
Optimized for x86-64 CPUs with: - SIMD vectorization - Multi-threading support - Efficient memory layout - Cache-friendly operations
MIT License
If you use this package in your research, please cite:
@software{ggmlR,
author = {Yuri Baramykov},
title = {ggmlR: CPU Tensor Operations for R},
year = {2026},
url = {https://github.com/Zabis13/ggmlR}
}