OraMatt: YABAOracle

Yet Another Blog About Oracle

Searching JSON Data in a Relational Table in Oracle Database 26ai

A practical harness for generating high-volume data, querying JSON, and comparing index strategies in Oracle.

Modern applications increasingly store semi-structured data alongside relational data. One of the strengths of Oracle Database 26ai is the ability to natively store JSON in relational tables while still leveraging full SQL semantics, indexing, and performance tuning.

  • Create relational tables that include JSON columns
  • Generate large volumes of test data
  • Compare multiple JSON search techniques
  • Benchmark different indexing strategies

1. Relational Schema with JSON Column

We’ll create two tables:

  • WAREHOUSES – relational table with spatial location
  • TRANSACTIONS – relational metadata + JSON history snapshot
create table warehouses
(
whse_sysid number generated by default as identity,
warehouse_name varchar2(100),
warehouse_loc sdo_geometry, /* store lat/long */
constraint pk_warehouses primary key (whse_sysid)
);
create table transactions
(
trnsct_hstry_sysid number generated by default as identity,
activity_group varchar2(100),
whse_sysid number,
activity varchar2(100),
create_ts timestamp default systimestamp,
create_userid varchar2(100),
hstry_snpsht json,
persistence_key varchar2(1000),
constraint pk_transactions primary key (trnsct_hstry_sysid),
constraint fk_transactions_whse
foreign key (whse_sysid) references warehouses (whse_sysid)
);

Why this model works well:

You get relational integrity (PK/FK, joins) while still capturing flexible event payloads in JSON, without forcing a rigid relational schema for every attribute.

2. High-Volume Data Generator

To meaningfully compare query patterns and indexing strategies, you need volume. The following PL/SQL procedure optionally generates warehouses, then generates N transactions per warehouse, storing a structured JSON payload in hstry_snpsht.

create or replace procedure gen_whse_txns (
p_num_warehouses in number default null,
p_txns_per_whse in number
) as
l_num_warehouses number := nvl(p_num_warehouses, 0);
l_lat number;
l_lon number;
l_activity_group varchar2(100);
l_activity varchar2(100);
l_userid varchar2(100);
l_notes varchar2(100);
l_source varchar2(100);
l_persistence_key varchar2(1000);
function pick(p_list sys.odcivarchar2list) return varchar2 is
begin
return p_list(trunc(dbms_random.value(1, p_list.count + 1)));
end;
begin
if p_txns_per_whse is null or p_txns_per_whse < 0 then
raise_application_error(-20001, 'p_txns_per_whse must be >= 0');
end if;
dbms_random.seed(to_char(systimestamp,'YYYYMMDDHH24MISSFF'));
-- 1) Optionally insert warehouses
if l_num_warehouses > 0 then
for i in 1 .. l_num_warehouses loop
l_lat := round(dbms_random.value(25, 49), 6);
l_lon := round(dbms_random.value(-124, -66), 6);
insert into warehouses (warehouse_name, warehouse_loc)
values (
'Warehouse ' || to_char(i, 'FM000') || ' ' || substr(rawtohex(sys_guid()), 1, 6),
sdo_geometry(
2001,
4326,
sdo_point_type(l_lon, l_lat, null),
null,
null
)
);
end loop;
commit;
end if;
-- 2) Ensure we have warehouses to attach transactions to
if p_txns_per_whse > 0 then
declare
l_cnt number;
begin
select count(*) into l_cnt from warehouses;
if l_cnt = 0 then
raise_application_error(-20002, 'No rows in WAREHOUSES. Insert warehouses or pass p_num_warehouses > 0.');
end if;
end;
end if;
-- 3) Insert transactions for each warehouse
for wh in (select whse_sysid, warehouse_name from warehouses) loop
for j in 1 .. p_txns_per_whse loop
l_activity_group := pick(sys.odcivarchar2list('RECEIVING','PUTAWAY','PICK','PACK','SHIP','CYCLE_COUNT','ADJUSTMENT'));
l_activity := pick(sys.odcivarchar2list('CREATE','UPDATE','CLOSE','CANCEL','MOVE','AUDIT'));
l_userid := pick(sys.odcivarchar2list('svc_wms','svc_integration','jdoe','asmith','mchan','op_user1','op_user2'));
l_notes := pick(sys.odcivarchar2list('ok','reprocessed','manual override','system generated','validated'));
l_source := pick(sys.odcivarchar2list('WMS','ERP','OMS','INTEGRATION'));
l_persistence_key :=
'WHSE=' || wh.whse_sysid ||
';GROUP=' || l_activity_group ||
';ACT=' || l_activity ||
';SEQ=' || j;
insert into transactions
(activity_group, whse_sysid, activity, create_ts, create_userid, hstry_snpsht, persistence_key)
values
(
l_activity_group,
wh.whse_sysid,
l_activity,
systimestamp - numtodsinterval(trunc(dbms_random.value(0, 30*24*60)), 'minute'),
l_userid,
json_object(
'warehouseId' value wh.whse_sysid,
'warehouseName' value wh.warehouse_name,
'event' value l_activity,
'group' value l_activity_group,
'notes' value l_notes,
'qty' value trunc(dbms_random.value(1, 500)),
'source' value l_source,
'ts' value to_char(systimestamp, 'YYYY-MM-DD"T"HH24:MI:SS.FF3TZH:TZM')
),
l_persistence_key
);
end loop;
commit; -- commit per warehouse
end loop;
end;
/

Example executions:

/* Populate warehouses + transactions */
exec gen_whse_txns(25, 100000);
/* Populate transactions only */
exec gen_whse_txns(0, 100000);

3. Searching JSON: Three Approaches

Approach 1: REGEX Search on Serialized JSON (Not Recommended)

select t.trnsct_hstry_sysid, t.whse_sysid, t.create_ts, t.hstry_snpsht
from transactions t
where regexp_like(
json_serialize(t.hstry_snpsht returning varchar2(4000)),
'"notes"\s*:\s*".*override.*"',
'i'
);

This treats JSON as text. It is flexible, but typically prevents index usage and often forces scans at scale.

Approach 2: LIKE / Wildcard Search on Serialized JSON

select t.trnsct_hstry_sysid, t.whse_sysid, t.create_ts
from transactions t
where lower(json_serialize(t.hstry_snpsht returning varchar2(4000)))
like '%"source":"erp"%';
with whopper as
(
select t.trnsct_hstry_sysid, t.whse_sysid, t.create_ts
from transactions t
where lower(json_serialize(t.hstry_snpsht returning varchar2(4000)))
like '%"source":"erp"%'
)
select count(1) from whopper;
select t.trnsct_hstry_sysid, t.whse_sysid
from transactions t
where json_serialize(t.hstry_snpsht returning varchar2(4000))
like '%Warehouse 005%';

Approach 3: SQL/JSON Paths (Preferred)

/* Exact match on a field using JSON_VALUE */
select t.trnsct_hstry_sysid, t.whse_sysid, t.create_ts
from transactions t
where json_value(t.hstry_snpsht, '$.source') = 'ERP';
/* Wildcard on extracted scalar */
select t.trnsct_hstry_sysid, t.whse_sysid, t.create_ts
from transactions t
where lower(json_value(t.hstry_snpsht, '$.notes')) like '%override%';
/* Numeric comparison */
select t.trnsct_hstry_sysid, t.whse_sysid
from transactions t
where json_value(t.hstry_snpsht, '$.qty' returning number) >= 300;
/* Projection: JSON fields as columns */
select
t.trnsct_hstry_sysid,
t.whse_sysid,
json_value(t.hstry_snpsht, '$.warehouseName') as warehouse_name,
json_value(t.hstry_snpsht, '$.event') as event,
json_value(t.hstry_snpsht, '$.group') as activity_group,
json_value(t.hstry_snpsht, '$.source') as source,
json_value(t.hstry_snpsht, '$.qty' returning number) as qty
from transactions t
where json_value(t.hstry_snpsht, '$.event') in ('CREATE','CLOSE');

Recommendation:

Prefer SQL/JSON operators such as JSON_VALUEJSON_EXISTS, and JSON_QUERY. They preserve type semantics, enable indexing, and give the optimizer the information it needs.

4. Capturing Execution Plans

Establish a baseline before indexing. Enable detailed statistics and capture actual execution metrics:

	-- sqlmon_run_and_spool.sql
	-- Headless-friendly SQLcl script: runs a tagged query, finds SQL_ID, spools SQL Monitor HTML, exits.

	-- Fail fast in headless mode
	whenever oserror exit 9
	whenever sqlerror exit sql.sqlcode

	-- Conservative output settings for clean spooling
	set echo off
	set termout on
	set verify off
	set feedback on
	set pagesize 0
	set linesize 32767
	set trimspool on
	set serveroutput off
	set heading off
	set tab off

	-- Needed to spool CLOB HTML cleanly
	set long 10000000
	set longchunksize 1000000

	-- Important: if your SQL text might contain '&' (common in JSON), turn DEFINE off.
	set define on

	-- 1) Timestamp for unique tag + filename
	column run_ts new_value run_ts noprint
	select to_char(systimestamp, 'YYYYMMDD_HH24MISSFF3') as run_ts from dual;

	define tag = SQLMON_&run_ts

	prompt
	prompt ==== Running SQL Monitor capture for tag: &tag ====
	prompt

	-- Optional: request "all executions" monitoring for the session (if you have privilege)
	-- This is not required, but can help in some environments.
	-- alter session set statistics_level = all;

	-- 2) Run your query (tagged). Keep the tag EXACTLY as below so we can find it in V$SQL.
	--    Add /*+ MONITOR */ to encourage SQL Monitor capture.
	--
	-- >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
	select /*+ monitor */ /* tag:&tag */
	       t.trnsct_hstry_sysid, t.whse_sysid, t.create_ts
	from   transactions t
	where  json_value(t.hstry_snpsht, '$.source') = 'BALT';
	-- <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

	-- 3) Locate SQL_ID for the tagged SQL text.
	-- In headless runs, V$SQL may lag briefly; retry a few times.

	column sql_id new_value sql_id noprint
	variable v_sql_id varchar2(13)

	begin
	  :v_sql_id := null;

	  for i in 1 .. 10 loop
	    begin
	      select sql_id
	      into   :v_sql_id
	      from (
	        select sql_id
	        from   v$sql
	        where  sql_text like '%tag:&tag%'
	        order  by last_active_time desc
	      )
	      where rownum = 1;

	      exit when :v_sql_id is not null;
	    exception
	      when no_data_found then
	        null;
	    end;

	    dbms_lock.sleep(1);
	  end loop;

	  if :v_sql_id is null then
	    raise_application_error(-20010, 'Could not find SQL_ID in V$SQL for tag: &tag');
	  end if;
	end;
	/

	-- Move bind to substitution variable for later use
	column sql_id_col new_value sql_id noprint
	select :v_sql_id as sql_id_col from dual;

	prompt
	prompt Found SQL_ID: &sql_id
	prompt Generating SQL Monitor HTML report...
	prompt

	-- 4) Spool HTML report locally (to current SQLcl working directory)
	spool sqlmon_&tag._&sql_id..html

	-- Use active report if still present; if the statement finished quickly, this still typically works.
	-- If your system requires, you can also pass SQL_EXEC_ID/SQL_EXEC_START (not included here).
	select dbms_sqltune.report_sql_monitor(
	         sql_id       => '&sql_id',
	         type         => 'HTML',
	         report_level => 'ALL'
	       )
	from dual;

	spool off

	prompt
	prompt Wrote report: sqlmon_&tag._&sql_id..html
	prompt	

5. Indexing Strategies for JSON

Strategy 1: Function-Based B-tree Indexes

Best for equality and range queries on known keys.
Equality on a JSON scalar (source)

create index txns_ix_source
on transactions (
json_value(hstry_snpsht, '$.source' returning varchar2(30) error on error null on empty)
);
select /*+ index(t txns_ix_source) */ *
from transactions t
where json_value(t.hstry_snpsht, '$.source' returning varchar2(30) error on error null on empty) = 'ERP';

Numeric range (qty)

create index txns_ix_qty
on transactions (
json_value(hstry_snpsht, '$.qty' returning number error on error null on empty)
);
select *
from transactions t
where json_value(t.hstry_snpsht, '$.qty' returning number error on error null on empty) >= 300;

Composite index (warehouse + source)

create index txns_ix_whse_source
on transactions (
whse_sysid,
json_value(hstry_snpsht, '$.source' returning varchar2(30) error on error null on empty)
);

Strategy 2: Virtual Columns + Normal Indexes

Promote frequently-used JSON keys into virtual columns, then index like any relational column.

alter table transactions add (
j_source varchar2(30)
generated always as (json_value(hstry_snpsht, '$.source' returning varchar2(30) null on error)) virtual,
j_qty number
generated always as (json_value(hstry_snpsht, '$.qty' returning number null on error)) virtual,
j_notes varchar2(200)
generated always as (json_value(hstry_snpsht, '$.notes' returning varchar2(200) null on error)) virtual
);
create index txns_ix_j_source on transactions(j_source);
create index txns_ix_j_qty on transactions(j_qty);
create index txns_ix_whse_jsource on transactions(whse_sysid, j_source);
-- standard SQL queries
select /*+ INDEX(TRANSACTIONS txns_ix_j_source) */ * from TRANSACTIONS where j_source = 'BALT';
select * from TRANSACTIONS where j_qty = 1000;
select * from TRANSACTIONS where whse_sysid = 1 and j_source = 'BALT';

Strategy 3: JSON Search Index

Best for flexible, schema-optional search and “find it anywhere” patterns.

create search index txns_jsi on transactions (hstry_snpsht) for json;
/* Does the JSON contain this text anywhere? */
select t.trnsct_hstry_sysid, t.whse_sysid, t.create_ts
from transactions t
where json_textcontains(t.hstry_snpsht, '$', 'matt');
select *
from TRANSACTIONS t
where json_exists(t.hstry_snpsht, '$?(@.source == "ERP")');
/* Count hits */
with countThis as
(
select *
from TRANSACTIONS t
where json_exists(t.hstry_snpsht, '$?(@.source == "ERP")')
)
select count(1) from countThis;
select *
from TRANSACTIONS t
where json_value(t.hstry_snpsht, '$.source') = 'ERP'
and json_value(t.hstry_snpsht, '$.qty' returning number) >= 300;
/* Exact match anywhere where value equals matt */
select t.trnsct_hstry_sysid, t.whse_sysid, t.create_ts
from transactions t
where json_exists(t.hstry_snpsht, '$?(@ == "matt")');
/* Case-insensitive equals match */
select t.trnsct_hstry_sysid, t.whse_sysid, t.create_ts
from transactions t
where json_exists(
t.hstry_snpsht,
'$?(@.type() == "string" && lower(@) == "matt")'
);
/* Contains match anywhere */
select t.trnsct_hstry_sysid, t.whse_sysid, t.create_ts
from transactions t
where json_exists(
t.hstry_snpsht,
'$?(@.type() == "string" && @ like_regex ".*matt.*" flag "i")'
);

6. When to Use Which Index

Use CaseBest Option
Equality on known keyFunction-based B-tree index
Range on numeric fieldFunction-based B-tree index
JSON fields behave relationally / used constantlyVirtual columns + normal indexes
Search anywhere / unknown JSON structureJSON Search Index
Ad hoc text search across JSON contentJSON Search Index (+ JSON_TEXTCONTAINS)

Rule of thumb:

If you know the JSON keys you query on, treat them like columns (function-based indexes or virtual columns). If you need flexible “document search,” use a JSON Search Index.

Leave a comment

Navigation

About

I’m Matt and I do Oracle things.