MariaDB Integration with Graphbit¶
Overview¶
This guide explains how to use Graphbit to generate embeddings and perform similarity search in MariaDB using its native VECTOR type and vector functions. You can use both MariaDB's built-in vector search and manual similarity search in Python for debugging or prototyping.
Prerequisites¶
- MariaDB 11.4+ with native VECTOR support enabled.
- OpenAI API Key (or another supported embedding provider).
- Graphbit installed and configured (see installation guide).
- Python environment with
mariadb
,graphbit
, andnumpy
installed. - A MariaDB database and user with appropriate permissions.
Step 1: Connect to MariaDB¶
Establish a connection to your MariaDB instance:
import os
import mariadb
conn = mariadb.connect(
user=os.getenv("DB_USER", "root"),
password=os.getenv("DB_PASSWORD", "12345"),
host=os.getenv("DB_HOST", "localhost"),
port=3306,
database=os.getenv("DB_NAME", "vector_db"),
)
Step 2: Create Table¶
Create a table with native VECTOR support:
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS graphbit_vector (
id INT AUTO_INCREMENT PRIMARY KEY,
text TEXT NOT NULL,
embedding VECTOR(1536) NOT NULL,
metadata JSON
);
""")
conn.commit()
Step 3: Generate Embeddings using Graphbit¶
Use Graphbit to generate embeddings for your texts:
from graphbit import EmbeddingClient, EmbeddingConfig
embedding_client = EmbeddingClient(
EmbeddingConfig.openai(
model="text-embedding-3-small",
api_key=os.getenv("OPENAI_API_KEY"),
)
)
texts = [
"GraphBit is a framework for LLM workflows and agent orchestration.",
"MariaDB supports native vector search for efficient AI similarity queries.",
"OpenAI offers tools for LLMs and embeddings."
]
embeds = embedding_client.embed_many(texts)
Step 4: Insert Embeddings into MariaDB¶
Insert the generated embeddings into the MariaDB table:
import array
import json
for txt, vec in zip(texts, embeds):
vec_bytes = array.array('f', vec).tobytes()
cursor.execute(
"INSERT INTO graphbit_vector (text, embedding, metadata) VALUES (?, ?, ?)",
(txt, vec_bytes, json.dumps({"source": "graphbit"}))
)
conn.commit()
Step 5: Vector Search using SQL (Native MariaDB Vector Search)¶
Embed your query and search for similar vectors in MariaDB using SQL:
query_vec = embedding_client.embed("What is GraphBit?")
query_vec_bytes = array.array('f', query_vec).tobytes()
cursor.execute("""
SELECT id, text, VEC_DISTANCE_COSINE(embedding, ?) AS score
FROM graphbit_vector
ORDER BY score
LIMIT 2;
""", (query_vec_bytes,))
rows = cursor.fetchall()
for id_, text, score in rows:
print(f"ID: {id_}\nSimilarity Score: {(1-score):.4f}\nText: {text}\n---")
Step 6: Vector Search Example (Graphbit Manual Similarity)¶
You can also fetch all embeddings from MariaDB and perform similarity search in Python using Graphbit. This is useful for debugging or comparing results.
import ast
cursor.execute("SELECT id, text, embedding, metadata FROM graphbit_vector;")
all_rows = cursor.fetchall()
best_score = -1
best_item = None
for row_id, text, embedding_vec_raw, metadata in all_rows:
if isinstance(embedding_vec_raw, str):
embedding_vec = ast.literal_eval(embedding_vec_raw)
elif isinstance(embedding_vec_raw, (bytes, bytearray)):
embedding_vec = array.array('f')
embedding_vec.frombytes(embedding_vec_raw)
embedding_vec = embedding_vec.tolist()
if len(embedding_vec) != len(query_vec):
print(f"Skipping row {row_id} due to dimension mismatch.")
continue
score = embedding_client.similarity(query_vec, embedding_vec)
print(f"Row ID: {row_id}, Score: {score:.4f}, Text: {text}")
Full Example¶
import os
import array
import json
import numpy as np
import mariadb
from graphbit import EmbeddingClient, EmbeddingConfig
# Step 1: Connect to MariaDB
conn = mariadb.connect(
user=os.getenv("DB_USER", "root"),
password=os.getenv("DB_PASSWORD", "12345"),
host=os.getenv("DB_HOST", "localhost"),
port=3306,
database=os.getenv("DB_NAME", "vector_db"),
)
cursor = conn.cursor()
# Step 2: Create Table
cursor.execute("""
CREATE TABLE IF NOT EXISTS graphbit_vector (
id INT AUTO_INCREMENT PRIMARY KEY,
text TEXT NOT NULL,
embedding VECTOR(1536) NOT NULL,
metadata JSON
);
""")
conn.commit()
# Step 3: Generate Embeddings using Graphbit
embedding_client = EmbeddingClient(
EmbeddingConfig.openai(
model="text-embedding-3-small",
api_key=os.getenv("OPENAI_API_KEY"),
)
)
texts = [
"GraphBit is a framework for LLM workflows and agent orchestration.",
"Qdrant is an open-source vector database for similarity search.",
"OpenAI offers tools for LLMs and embeddings."
]
embeds = embedding_client.embed_many(texts)
# Step 4: Insert Embeddings into MariaDB
for txt, vec in zip(texts, embeds):
vec_bytes = array.array('f', vec).tobytes()
cursor.execute(
"INSERT INTO graphbit_vector (text, embedding, metadata) VALUES (?, ?, ?)",
(txt, vec_bytes, json.dumps({"source": "graphbit"}))
)
conn.commit()
# Step 5: SQL vector search
query_vec = embedding_client.embed("What is GraphBit?")
query_vec_bytes = array.array('f', query_vec).tobytes()
cursor.execute("""
SELECT id, text, VEC_DISTANCE_COSINE(embedding, ?) AS score
FROM graphbit_vector
ORDER BY score
LIMIT 2;
""", (query_vec_bytes,))
rows = cursor.fetchall()
for id_, text, score in rows:
print(f"[SQL] ID: {id_}\nSimilarity Score: {(1-score):.4f}\nText: {text}\n---")
# Step 6: Manual similarity search
cursor.execute("SELECT id, text, embedding FROM graphbit_vector")
rows = cursor.fetchall()
def decode_vector(vec_bytes):
arr = array.array('f')
arr.frombytes(vec_bytes)
return np.array(arr)
texts = []
vectors = []
ids = []
for id_, text, vec_bytes in rows:
ids.append(id_)
texts.append(text)
vectors.append(decode_vector(vec_bytes))
query_vec = np.array(embedding_client.embed("What is GraphBit?"))
def cosine_similarity(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
scores = [cosine_similarity(query_vec, vec) for vec in vectors]
ranked = sorted(zip(ids, texts, scores), key=lambda x: x[2], reverse=True)
for id_, text, score in ranked[:2]:
print(f"[Manual] ID: {id_}\nScore: {score:.4f}\nText: {text}\n---")
# Closing Mariadb
cursor.close()
conn.close()
This integration enables you to leverage Graphbit's embedding capabilities with MariaDB's native vector support for scalable, production-grade semantic search and retrieval workflows.