Google Cloud Platform Integration with Graphbit¶
Overview¶
This guide explains how to connect Google Cloud Platform (GCP) services to Graphbit, with a focus on AlloyDB for vector storage. This integration enables you to leverage Google's enterprise-grade database within your Graphbit workflows, providing access to AlloyDB's PostgreSQL-compatible vector database with built-in pgvector support.
Prerequisites¶
- Google Cloud Project: Set up a project in Google Cloud Console.
- AlloyDB Instance: Create an AlloyDB instance in your Google Cloud project (see AlloyDB documentation).
- .env file in your project root with the following variables:
Step 1: AlloyDB Setup¶
Connect to AlloyDB and Ensure Table Exists¶
import psycopg2
import os
# Connect to AlloyDB
conn = psycopg2.connect(
dbname=os.getenv("ALLOYDB_DATABASE"),
user=os.getenv("ALLOYDB_USER"),
password=os.getenv("ALLOYDB_PASSWORD"),
host=os.getenv("ALLOYDB_HOST"),
port=os.getenv("ALLOYDB_PORT", "5432")
)
cur = conn.cursor()
# Ensure pgvector extension and table exist
cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
cur.execute("""
CREATE TABLE IF NOT EXISTS alloydb_vectors (
id SERIAL PRIMARY KEY,
item_id TEXT,
embedding VECTOR(1536),
metadata JSONB
);
""")
cur.execute("""
CREATE INDEX IF NOT EXISTS idx_alloydb_embedding_vector ON alloydb_vectors USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
""")
conn.commit()
Step 2: Store and Search Vectors with OpenAI Embeddings¶
2.1. Generate and Store an Embedding¶
import json
from graphbit import EmbeddingClient, EmbeddingConfig
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
embedding_config = EmbeddingConfig.openai(OPENAI_API_KEY, "text-embedding-3-small")
embedding_client = EmbeddingClient(embedding_config)
# Insert a single embedding
doc_text = "This is a sample document for vector search."
embedding = embedding_client.embed(doc_text)
cur.execute(
"""
INSERT INTO alloydb_vectors (item_id, embedding, metadata)
VALUES (%s, %s, %s)
""",
("item123", embedding, json.dumps({"category": "test"}))
)
conn.commit()
print("Inserted embedding for item123.")
Step 2.2: Batch Embedding Example¶
batch_texts = [
"Graph databases are great for relationships.",
"Vector search enables semantic retrieval.",
"OpenAI provides powerful embedding models.",
]
batch_embeddings = embedding_client.embed_many(batch_texts)
for idx, (text, emb) in enumerate(zip(batch_texts, batch_embeddings)):
cur.execute(
"""
INSERT INTO alloydb_vectors (item_id, embedding, metadata)
VALUES (%s, %s, %s)
""",
(f"batch_{idx}", emb, json.dumps({"text": text}))
)
conn.commit()
print(f"Inserted {len(batch_texts)} documents with embeddings.")
3. Vector Search using GraphBit¶
import ast
query_text = "Find documents related to vector search."
query_embedding = embedding_client.embed(query_text)
cur.execute("SELECT item_id, embedding, metadata FROM alloydb_vectors;")
all_rows = cur.fetchall()
best_score = -1
best_item = None
for item_id, embedding_vec, metadata in all_rows:
# Convert the embedding from string to list if needed
if isinstance(embedding_vec, str):
embedding_vec = ast.literal_eval(embedding_vec)
score = embedding_client.similarity(query_embedding, embedding_vec)
if score > best_score:
best_score = score
best_item = (item_id, metadata)
if best_item is not None:
print(f"Most similar document: {best_item[0]} with score {best_score:.4f}")
else:
print("No documents found in vector table.")
This integration enables you to leverage Google's enterprise-grade database and AI capabilities within your Graphbit workflows, providing access to AlloyDB's high-performance vector database with built-in pgvector support, powerful AI models, and scalable infrastructure for production AI applications.