Schema Changes: 2.2.3
Upgrade: 2.2.2 → 2.2.3
This release applied 3 migration(s) to the database schema.
| ID | Type | Description |
|---|---|---|
schema |
vandelay.import match no like any |
|
schema |
checkless browse fix |
|
schema |
maintain control number oclc |
Migration Details
738 — vandelay.import match no like any
Type: schema
View SQL
-- 0738.schema.vandelay.import-match-no-like-any.sql
CREATE OR REPLACE FUNCTION vandelay.match_set_test_marcxml(
match_set_id INTEGER, record_xml TEXT
) RETURNS SETOF vandelay.match_set_test_result AS $$
DECLARE
tags_rstore HSTORE;
svf_rstore HSTORE;
coal TEXT;
joins TEXT;
query_ TEXT;
wq TEXT;
qvalue INTEGER;
rec RECORD;
tags_rstore := vandelay.flatten_marc_hstore(record_xml);
svf_rstore := vandelay.extract_rec_attrs(record_xml);
CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER);
CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT);
-- generate the where clause and return that directly (into wq), and as
-- a side-effect, populate the _vandelay_tmp_[qj]rows tables.
wq := vandelay.get_expr_from_match_set(match_set_id, tags_rstore);
query_ := 'SELECT DISTINCT(record), ';
-- qrows table is for the quality bits we add to the SELECT clause
SELECT ARRAY_TO_STRING(
ARRAY_ACCUM('COALESCE(n' || q::TEXT || '.quality, 0)'), ' + '
) INTO coal FROM _vandelay_tmp_qrows;
-- our query string so far is the SELECT clause and the inital FROM.
-- no JOINs yet nor the WHERE clause
query_ := query_ || coal || ' AS quality ' || E'\n';
-- jrows table is for the joins we must make (and the real text conditions)
SELECT ARRAY_TO_STRING(ARRAY_ACCUM(j), E'\n') INTO joins
FROM _vandelay_tmp_jrows;
-- add those joins and the where clause to our query.
query_ := query_ || joins || E'\n' || 'JOIN biblio.record_entry bre ON (bre.id = record) ' || 'WHERE ' || wq || ' AND not bre.deleted';
-- this will return rows of record,quality
FOR rec IN EXECUTE query_ USING tags_rstore, svf_rstore LOOP
RETURN NEXT rec;
END LOOP;
DROP TABLE _vandelay_tmp_qrows;
DROP TABLE _vandelay_tmp_jrows;
RETURN;
END;
$$ LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
match_set_id INTEGER,
tags_rstore HSTORE
) RETURNS TEXT AS $$
DECLARE
root vandelay.match_set_point;
SELECT * INTO root FROM vandelay.match_set_point
WHERE parent IS NULL AND match_set = match_set_id;
RETURN vandelay.get_expr_from_match_set_point(root, tags_rstore);
END;
$$ LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point(
node vandelay.match_set_point,
tags_rstore HSTORE
) RETURNS TEXT AS $$
DECLARE
q TEXT;
i INTEGER;
this_op TEXT;
children INTEGER[];
child vandelay.match_set_point;
SELECT ARRAY_ACCUM(id) INTO children FROM vandelay.match_set_point
WHERE parent = node.id;
IF ARRAY_LENGTH(children, 1) > 0 THEN
this_op := vandelay._get_expr_render_one(node);
q := '(';
i := 1;
WHILE children[i] IS NOT NULL LOOP
SELECT * INTO child FROM vandelay.match_set_point
WHERE id = children[i];
IF i > 1 THEN
q := q || ' ' || this_op || ' ';
END IF;
i := i + 1;
q := q || vandelay.get_expr_from_match_set_point(child, tags_rstore);
END LOOP;
q := q || ')';
RETURN q;
ELSIF node.bool_op IS NULL THEN
PERFORM vandelay._get_expr_push_qrow(node);
PERFORM vandelay._get_expr_push_jrow(node, tags_rstore);
RETURN vandelay._get_expr_render_one(node);
ELSE
RETURN '';
END IF;
END;
$$ LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow(
node vandelay.match_set_point,
tags_rstore HSTORE
) RETURNS VOID AS $$
DECLARE
jrow TEXT;
my_alias TEXT;
op TEXT;
tagkey TEXT;
caseless BOOL;
jrow_count INT;
my_using TEXT;
my_join TEXT;
-- remember $1 is tags_rstore, and $2 is svf_rstore
caseless := FALSE;
SELECT COUNT(*) INTO jrow_count FROM _vandelay_tmp_jrows;
IF jrow_count > 0 THEN
my_using := ' USING (record)';
my_join := 'FULL OUTER JOIN';
ELSE
my_using := '';
my_join := 'FROM';
END IF;
IF node.tag IS NOT NULL THEN
caseless := (node.tag IN ('020', '022', '024'));
tagkey := node.tag;
IF node.subfield IS NOT NULL THEN
tagkey := tagkey || node.subfield;
END IF;
END IF;
IF node.negate THEN
IF caseless THEN
op := 'NOT LIKE';
ELSE
op := '<>';
END IF;
ELSE
IF caseless THEN
op := 'LIKE';
ELSE
op := '=';
END IF;
END IF;
my_alias := 'n' || node.id::TEXT;
jrow := my_join || ' (SELECT *, ';
IF node.tag IS NOT NULL THEN
jrow := jrow || node.quality ||
' AS quality FROM metabib.full_rec mfr WHERE mfr.tag = ''' ||
node.tag || '''';
IF node.subfield IS NOT NULL THEN
jrow := jrow || ' AND mfr.subfield = ''' ||
node.subfield || '''';
END IF;
jrow := jrow || ' AND (';
jrow := jrow || vandelay._node_tag_comparisons(caseless, op, tags_rstore, tagkey);
jrow := jrow || ')) ' || my_alias || my_using || E'\n';
ELSE -- svf
jrow := jrow || 'id AS record, ' || node.quality ||
' AS quality FROM metabib.record_attr mra WHERE mra.attrs->''' ||
node.svf || ''' ' || op || ' $2->''' || node.svf || ''') ' ||
my_alias || my_using || E'\n';
END IF;
INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow);
END;
$$ LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION vandelay._node_tag_comparisons(
caseless BOOLEAN,
op TEXT,
tags_rstore HSTORE,
tagkey TEXT
) RETURNS TEXT AS $$
DECLARE
result TEXT;
i INT;
vals TEXT[];
i := 1;
vals := tags_rstore->tagkey;
result := '';
WHILE TRUE LOOP
IF i > 1 THEN
IF vals[i] IS NULL THEN
EXIT;
ELSE
result := result || ' OR ';
END IF;
END IF;
IF caseless THEN
result := result || 'LOWER(mfr.value) ' || op;
ELSE
result := result || 'mfr.value ' || op;
END IF;
result := result || ' ' || COALESCE('''' || vals[i] || '''', 'NULL');
IF vals[i] IS NULL THEN
EXIT;
END IF;
i := i + 1;
END LOOP;
RETURN result;
END;
$$ LANGUAGE PLPGSQL;
-- drop old versions of these functions with fewer args
DROP FUNCTION vandelay.get_expr_from_match_set( INTEGER );
DROP FUNCTION vandelay.get_expr_from_match_set_point( vandelay.match_set_point );
DROP FUNCTION vandelay._get_expr_push_jrow( vandelay.match_set_point );
-- This next index might fully supplant an existing one but leaving both for now
-- (they are not too large)
-- The reason we need this index is to ensure that the query parser always
-- prefers this index over the simpler tag/subfield index, as this greatly
-- increases Vandelay overlay speed for these identifiers, especially when
-- a record has many of these fields (around > 4-6 seems like the cutoff
-- on at least one PG9.1 system)
-- A similar index could be added for other fields (e.g. 010), but one should
-- leave out the LOWER() in all other cases.
-- TODO: verify whether we can discard the non tag/subfield/substring version
-- (metabib_full_rec_isxn_caseless_idx)
CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring
ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024)))
WHERE tag IN ('020', '022', '024');
740 — checkless browse fix
Type: schema
View SQL
CREATE OR REPLACE
FUNCTION metabib.suggest_browse_entries(
raw_query_text TEXT, -- actually typed by humans at the UI level
search_class TEXT, -- 'alias' or 'class' or 'class|field..', etc
headline_opts TEXT, -- markup options for ts_headline()
visibility_org INTEGER,-- null if you don't want opac visibility test
query_limit INTEGER,-- use in LIMIT clause of interal query
normalization INTEGER -- argument to TS_RANK_CD()
) RETURNS TABLE (
value TEXT, -- plain
field INTEGER,
buoyant_and_class_match BOOL,
field_match BOOL,
field_weight INTEGER,
rank REAL,
buoyant BOOL,
match TEXT -- marked up
) AS $func$
DECLARE
prepared_query_texts TEXT[];
query TSQUERY;
plain_query TSQUERY;
opac_visibility_join TEXT;
search_class_join TEXT;
r_fields RECORD;
prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
query := TO_TSQUERY('keyword', prepared_query_texts[1]);
plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
visibility_org := NULLIF(visibility_org,-1);
IF visibility_org IS NOT NULL THEN
opac_visibility_join := '
JOIN asset.opac_visible_copies aovc ON (
aovc.record = x.source AND
aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4))
)';
ELSE
opac_visibility_join := '';
END IF;
-- The following determines whether we only provide suggestsons matching
-- the user's selected search_class, or whether we show other suggestions
-- too. The reason for MIN() is that for search_classes like
-- 'title|proper|uniform' you would otherwise get multiple rows. The
-- implication is that if title as a class doesn't have restrict,
-- nor does the proper field, but the uniform field does, you're going
-- to get 'false' for your overall evaluation of 'should we restrict?'
-- To invert that, change from MIN() to MAX().
SELECT
INTO r_fields
MIN(cmc.restrict::INT) AS restrict_class,
MIN(cmf.restrict::INT) AS restrict_field
FROM metabib.search_class_to_registered_components(search_class)
AS _registered (field_class TEXT, field INT)
JOIN
config.metabib_class cmc ON (cmc.name = _registered.field_class)
LEFT JOIN
config.metabib_field cmf ON (cmf.id = _registered.field);
-- evaluate 'should we restrict?'
IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
search_class_join := '
JOIN
metabib.search_class_to_registered_components($2)
AS _registered (field_class TEXT, field INT) ON (
(_registered.field IS NULL AND
_registered.field_class = cmf.field_class) OR
(_registered.field = cmf.id)
)
';
ELSE
search_class_join := '
LEFT JOIN
metabib.search_class_to_registered_components($2)
AS _registered (field_class TEXT, field INT) ON (
_registered.field_class = cmc.name
)
';
END IF;
RETURN QUERY EXECUTE '
SELECT DISTINCT
x.value,
x.id,
x.push,
x.restrict,
x.weight,
x.ts_rank_cd,
x.buoyant,
TS_HEADLINE(value, $7, $3)
FROM (SELECT DISTINCT
mbe.value,
cmf.id,
cmc.buoyant AND _registered.field_class IS NOT NULL AS push,
_registered.field = cmf.id AS restrict,
cmf.weight,
TS_RANK_CD(mbe.index_vector, $1, $6),
cmc.buoyant,
mbedm.source
FROM metabib.browse_entry_def_map mbedm
JOIN (SELECT * FROM metabib.browse_entry WHERE index_vector @@ $1 LIMIT 10000) mbe ON (mbe.id = mbedm.entry)
JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
' || search_class_join || '
ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
LIMIT 1000) AS x
' || opac_visibility_join || '
ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
LIMIT $5
' -- sic, repeat the order by clause in the outer select too
USING
query, search_class, headline_opts,
visibility_org, query_limit, normalization, plain_query
;
-- sort order:
-- buoyant AND chosen class = match class
-- chosen field = match field
-- field weight
-- rank
-- buoyancy
-- value itself
END;
$func$ LANGUAGE PLPGSQL;
742 — maintain control number oclc
Type: schema
View SQL
-- Prepare for the July 2013 introduction of OCLC's "on" prefix
-- Per LP# 1049171
CREATE OR REPLACE FUNCTION maintain_control_numbers() RETURNS TRIGGER AS $func$
use strict;
use MARC::Record;
use MARC::File::XML (BinaryEncoding => 'UTF-8');
use MARC::Charset;
use Encode;
use Unicode::Normalize;
MARC::Charset->assume_unicode(1);
my $record = MARC::Record->new_from_xml($_TD->{new}{marc});
my $schema = $_TD->{table_schema};
my $rec_id = $_TD->{new}{id};
# Short-circuit if maintaining control numbers per MARC21 spec is not enabled
my $enable = spi_exec_query("SELECT enabled FROM config.global_flag WHERE name = 'cat.maintain_control_numbers'");
if (!($enable->{processed}) or $enable->{rows}[0]->{enabled} eq 'f') {
return;
}
# Get the control number identifier from an OU setting based on $_TD->{new}{owner}
my $ou_cni = 'EVRGRN';
my $owner;
if ($schema eq 'serial') {
$owner = $_TD->{new}{owning_lib};
} else {
# are.owner and bre.owner can be null, so fall back to the consortial setting
$owner = $_TD->{new}{owner} || 1;
}
my $ous_rv = spi_exec_query("SELECT value FROM actor.org_unit_ancestor_setting('cat.marc_control_number_identifier', $owner)");
if ($ous_rv->{processed}) {
$ou_cni = $ous_rv->{rows}[0]->{value};
$ou_cni =~ s/"//g; # Stupid VIM syntax highlighting"
} else {
# Fall back to the shortname of the OU if there was no OU setting
$ous_rv = spi_exec_query("SELECT shortname FROM actor.org_unit WHERE id = $owner");
if ($ous_rv->{processed}) {
$ou_cni = $ous_rv->{rows}[0]->{shortname};
}
}
my ($create, $munge) = (0, 0);
my @scns = $record->field('035');
foreach my $id_field ('001', '003') {
my $spec_value;
my @controls = $record->field($id_field);
if ($id_field eq '001') {
$spec_value = $rec_id;
} else {
$spec_value = $ou_cni;
}
# Create the 001/003 if none exist
if (scalar(@controls) == 1) {
# Only one field; check to see if we need to munge it
unless (grep $_->data() eq $spec_value, @controls) {
$munge = 1;
}
} else {
# Delete the other fields, as with more than 1 001/003 we do not know which 003/001 to match
foreach my $control (@controls) {
$record->delete_field($control);
}
$record->insert_fields_ordered(MARC::Field->new($id_field, $spec_value));
$create = 1;
}
}
my $cn = $record->field('001')->data();
# Special handling of OCLC numbers, often found in records that lack 003
if ($cn =~ /^o(c[nm]|n)\d/) {
$cn =~ s/^o(c[nm]|n)0*(\d+)/$2/;
$record->field('003')->data('OCoLC');
$create = 0;
}
# Now, if we need to munge the 001, we will first push the existing 001/003
# into the 035; but if the record did not have one (and one only) 001 and 003
# to begin with, skip this process
if ($munge and not $create) {
my $scn = "(" . $record->field('003')->data() . ")" . $cn;
# Do not create duplicate 035 fields
unless (grep $_->subfield('a') eq $scn, @scns) {
$record->insert_fields_ordered(MARC::Field->new('035', '', '', 'a' => $scn));
}
}
# Set the 001/003 and update the MARC
if ($create or $munge) {
$record->field('001')->data($rec_id);
$record->field('003')->data($ou_cni);
my $xml = $record->as_xml_record();
$xml =~ s/\n//sgo;
$xml =~ s/^<\?xml.+\?\s*>//go;
$xml =~ s/>\s+</></go;
$xml =~ s/\p{Cc}//go;
# Embed a version of OpenILS::Application::AppUtils->entityize()
# to avoid having to set PERL5LIB for PostgreSQL as well
# If we are going to convert non-ASCII characters to XML entities,
# we had better be dealing with a UTF8 string to begin with
$xml = decode_utf8($xml);
$xml = NFC($xml);
# Convert raw ampersands to entities
$xml =~ s/&(?!\S+;)/&/gso;
# Convert Unicode characters to entities
$xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
$xml =~ s/[\x00-\x1f]//go;
$_TD->{new}{marc} = $xml;
return "MODIFY";
}
return;
$func$ LANGUAGE PLPERLU;