Skip to content

DuckPGQ

DuckPGQ is a DuckDB community extension for graph workloads that supports the SQL/PGQ standard.

Leveraging the power of DuckDB to bring high-performance, SQL-based graph query capabilities directly to your analytical workflows.

Install and Load DuckPGQ

INSTALL duckpgq FROM community; 
LOAD duckpgq; 
import duckdb
conn = duckdb.connect()
conn.install_extension("duckpgq", repository="community")
conn.load_extension("duckpgq")
import { DuckDBInstance } from '@duckdb/node-api';
const instance = await DuckDBInstance.create();
const connection = await instance.connect();
await connection.run("INSTALL duckpgq FROM community");
await connection.run("LOAD duckpgq");
library(duckdb)
con <- dbConnect(duckdb())
dbExecute(con, "INSTALL duckpgq FROM community")
dbExecute(con, "LOAD duckpgq")
Connection conn = DriverManager.getConnection("jdbc:duckdb:");
Statement st = conn.createStatement();
st.execute("INSTALL duckpgq FROM community");
st.execute("LOAD duckpgq");

Key Features

  • SQL/PGQ Standard
    Supports graph pattern matching with SQL/PGQ, following the SQL:2023 standard.
  • High Performance
    Leverages DuckDB for efficient, in-process graph analytics.
  • Easy To Install
    Seamlessly integrate with DuckDB through a simple setup process, requiring no dependencies.
  • Open Source
    Community-driven, extensible, and free to use.

Explore DuckPGQ Across Domains

Setup
ATTACH 'https://github.com/Dtenwolde/duckpgq-docs/raw/refs/heads/main/datasets/snb.duckdb';

use snb;
install duckpgq from community; 
load duckpgq;

CREATE or replace PROPERTY GRAPH snb
VERTEX TABLES (
  Person, Forum
)
EDGE TABLES (
  Person_knows_person     SOURCE KEY (Person1Id) REFERENCES Person (id)
                          DESTINATION KEY (Person2Id) REFERENCES Person (id)
                          LABEL knows,
  Forum_hasMember_Person  SOURCE KEY (ForumId) REFERENCES Forum (id)
                          DESTINATION KEY (PersonId) REFERENCES Person (id)
                          LABEL hasMember
);
-- find the shortest path from one person to all other persons
FROM GRAPH_TABLE (snb
  MATCH p = ANY SHORTEST (p1:person WHERE p1.id = 14)-[k:knows]->*(p2:person)
  COLUMNS (p1.id, p2.id as other_person_id, element_id(p), path_length(p))
);
-- Find mutual friends between two users
FROM GRAPH_TABLE (snb
  MATCH (p1:Person WHERE p1.id = 16)-[k:knows]->(p2:Person)<-[k2:knows]-(p3:Person WHERE p3.id = 32)
  COLUMNS (p2.firstName)
);
-- Find the 3 most popular people 
FROM GRAPH_TABLE (snb
  MATCH (follower:Person)-[follows:knows]->(person:Person)
  COLUMNS (person.id AS personID, person.firstname, person.lastname, follower.id AS followerID)
)
SELECT personID, firstname, lastname, COUNT(followerID) AS numFollowers
GROUP BY ALL 
ORDER BY numFollowers DESC 
LIMIT 3;
-- Number of forums posted on by the most followed person
WITH
mfp AS (
  FROM GRAPH_TABLE (snb
    MATCH (follower:Person)-[follows:knows]->(person:Person)
    COLUMNS (person.id AS personID, person.firstname, follower.id AS followerID)
  )
SELECT personID, firstname, COUNT(followerID) AS numFollowers
GROUP BY ALL ORDER BY numFollowers DESC LIMIT 1
)
FROM
  mfp,
  GRAPH_TABLE (snb
    MATCH (person:Person)<-[fhm:hasMember]-(f:Forum)
    COLUMNS (person.id AS personID, f.id as forumId)
) mem
SELECT mfp.personID, mfp.firstname, mfp.numFollowers, count(mem.forumId) forumCount
WHERE mfp.personID = mem.personID
GROUP BY ALL;
Setup
ATTACH 'https://github.com/Dtenwolde/duckpgq-docs/raw/refs/heads/airline-data/datasets/airline-data-small.duckdb' as airline;

use airline;
install duckpgq from community; 
load duckpgq; 

CREATE PROPERTY GRAPH flight_graph
  VERTEX TABLES (
    aircrafts_data, airports_data,
    bookings, flights,
    tickets, seats
  )
  EDGE TABLES (
    route
      SOURCE KEY (departure_airport) REFERENCES airports_data(airport_code)
      DESTINATION KEY (arrival_airport) REFERENCES airports_data(airport_code),
    ticket_flights
      SOURCE KEY (ticket_no) REFERENCES tickets(ticket_no)
      DESTINATION KEY (flight_id) REFERENCES flights(flight_id),
    bookings_tickets
      SOURCE KEY (book_ref) REFERENCES bookings(book_ref)
      DESTINATION KEY (ticket_no) REFERENCES tickets(ticket_no),
    boarding_passes 
      SOURCE KEY (ticket_no) REFERENCES tickets(ticket_no)
      DESTINATION KEY (seat_no) REFERENCES seats(seat_no)
);
FROM (
  SELECT unnest(flights) AS flights 
    FROM GRAPH_TABLE (
    flight_graph 
    MATCH o = ANY SHORTEST (a:airports_data WHERE a.airport_code = 'UKX')
      -[fr:route]->*
      (a2:airports_data WHERE a2.airport_code = 'CNN') 
    COLUMNS (edges(o) AS flights)
  )
) 
JOIN route f 
  ON f.rowid = flights;
FROM GRAPH_TABLE (
  flight_graph 
  MATCH (b:bookings)-[bt:bookings_tickets]->(t:tickets)-[bp:boarding_passes]->(s:seats)
)
SELECT round(avg(total_amount), 2) avg_amount, seat_no 
GROUP BY seat_no 
ORDER BY avg_amount DESC;
Setup
ATTACH 'https://github.com/Dtenwolde/duckpgq-docs/raw/refs/heads/main/datasets/finbench.duckdb' AS finbench;
use finbench;
INSTALL duckpgq FROM community;
LOAD duckpgq; 

CREATE OR REPLACE PROPERTY GRAPH finbench
VERTEX TABLES (
  Account, Company, Loan, 
  Medium, Person
)
EDGE TABLES (
  AccountRepayLoan        SOURCE KEY (accountId) REFERENCES Account (accountId)
                          DESTINATION KEY (loanId) REFERENCES Loan (loanId) 
                          LABEL repay,
  AccountTransferAccount  SOURCE KEY (fromId) REFERENCES Account (accountId)
                          DESTINATION KEY (toId) REFERENCES Account (AccountId) 
                          LABEL transfer,
  AccountWithdrawAccount  SOURCE KEY (fromId) REFERENCES Account (accountId)
                          DESTINATION KEY (toId) REFERENCES Account (AccountId) 
                          LABEL withdraw,
  CompanyApplyLoan        SOURCE KEY (companyId) REFERENCES Company (companyId)
                          DESTINATION KEY (loanId) REFERENCES Loan (loanId) 
                          LABEL companyApply,
  CompanyGuaranteeCompany SOURCE KEY (fromId) REFERENCES Company (companyId)
                          DESTINATION KEY (toId) REFERENCES Company (companyId) 
                          LABEL companyGuarantee,
  CompanyInvestCompany    SOURCE KEY (investorId) REFERENCES Company (companyId)
                          DESTINATION KEY (companyId) REFERENCES Company (companyId) 
                          LABEL companyInvest,
  CompanyOwnAccount       SOURCE KEY (companyId) REFERENCES Company (companyId)
                          DESTINATION KEY (accountId) REFERENCES Account (accountId) 
                          LABEL companyOwn,
  LoanDepositAccount      SOURCE KEY (loanId) REFERENCES Loan (loanId)
                          DESTINATION KEY (accountId) REFERENCES Account (accountId) 
                          LABEL deposit,
  MediumSignInAccount     SOURCE KEY (mediumId) REFERENCES Medium (mediumId)
                          DESTINATION KEY (accountId) REFERENCES Account (accountId) 
                          LABEL signIn,
  PersonApplyLoan         SOURCE KEY (personId) REFERENCES Person (personId)
                          DESTINATION KEY (loanId) REFERENCES Loan (loanId) 
                          LABEL personApply,
  PersonGuaranteePerson   SOURCE KEY (fromId) REFERENCES Person (personId)
                          DESTINATION KEY (toId) REFERENCES Person (personId) 
                          LABEL personGuarantee,
  PersonInvestCompany     SOURCE KEY (investorId) REFERENCES Person (personId)
                          DESTINATION KEY (companyId) REFERENCES Company (companyId) 
                          LABEL personInvest,
  PersonOwnAccount        SOURCE KEY (personId) REFERENCES Person (personId)
                          DESTINATION KEY (accountId) REFERENCES Account (accountId) 
                          LABEL personOwn
);
FROM GRAPH_TABLE (
  finbench 
  MATCH (src:Account where src.accountId = 16607023625929101)
    <-[e1:transfer]-(mid:Account)
    -[e2:transfer]->(dst:Account where dst.isBlocked = true) 
  COLUMNS (src.accountId as src_id, dst.accountId as dst_id)
) 
SELECT src_id, dst_id
WHERE src_Id <> dst_id; 
FROM GRAPH_TABLE (
  finbench 
  MATCH (src:Account)-[e:Transfer]->(dst:Account) 
  WHERE '2022-07-13 09:18:33.137' < e.createtime 
    AND e.createtime < '2022-09-03 02:31:47.812' 
    AND e.amount > 4829783
  );

Behind DuckPGQ

Daniel ten Wolde

Daniël ten Wolde

Lead Developer of DuckPGQ and PhD student at CWI specializing in graph analytics and database systems.

WIP Disclaimer

DuckPGQ is currently a research project and still a work in progress. While we encourage you to explore and experiment with it, please be aware that there may be bugs, incomplete features, or unexpected behaviour.

We greatly appreciate any feedback or bug reports that help us improve and evolve the extension. Feel free to share your experiences!