Difference between revisions of "SUNScholar/SQL Tips/4.X"

From Libopedia
Jump to navigation Jump to search
(Created page with "<center> '''BACK TO SQL TIPS''' </center>")
 
m
 
(One intermediate revision by the same user not shown)
Line 2: Line 2:
 
  '''[[SUNScholar/SQL Tips|BACK TO SQL TIPS]]'''
 
  '''[[SUNScholar/SQL Tips|BACK TO SQL TIPS]]'''
 
</center>
 
</center>
 +
;Display all embargoed items
 +
Here's a SQL query that will give you item handles for all bitstreams which have a policy with either a specified start date or end date:
 +
<pre>
 +
SELECT h.handle, rp.*
 +
FROM resourcepolicy rp, bundle2bitstream b2b, item2bundle i2b, handle h
 +
WHERE (start_date IS NOT NULL OR end_date IS NOT NULL)
 +
AND rp.resource_type_id = 0
 +
AND b2b.bitstream_id = rp.resource_id
 +
AND i2b.bundle_id = b2b.bundle_id
 +
AND i2b.item_id = h.resource_id
 +
AND h.resource_type_id = 2;
 +
</pre>
 +
;Display duplicate bitstream handles
 +
<pre>
 +
SELECT h.handle, COUNT(b.sequence_id)
 +
FROM handle h, item2bundle i2b, bundle2bitstream b2b, bitstream b
 +
WHERE h.resource_type_id = 2
 +
AND h.resource_id = i2b.item_id
 +
AND i2b.bundle_id = b2b.bundle_id
 +
AND b2b.bitstream_id = b.bitstream_id
 +
GROUP BY h.handle, b.sequence_id
 +
HAVING COUNT(b.sequence_id) > 1
 +
ORDER BY h.handle;
 +
</pre>
 +
;Move collection to another community.
 +
Find the collection id for the two collections.
 +
*UPDATE community2collection SET community_id=$NEW_PARENT WHERE community_id=$OLD_PARENT and collection_id=$COLLECTION;
 +
In this example moving items from 7 to 2
 +
*update collection2item set collection_id='2' where collection_id='7';
 +
*update collection_item_count set count='0' where collection_id='7';
 +
Change the new collection ID over to the existing collection
 +
*update item set owning_collection='2' where owning_collection='7';
 +
Then update item counts
 +
$HOME/bin/dspace itemcounter
 +
;Update/replace metadata
 +
*UPDATE metadatavalue SET text_value = 'Journal Article' WHERE text_value = 'Journal Articles'
 +
*UPDATE metadatavalue SET text_value = 'Article' WHERE text_value = (SELECT metadatavalue.text_value FROM public.metadatavalue WHERE metadatavalue.text_value = 'JournalArticles');  UPDATE +1
 +
 +
;Extract metadata
 +
*SELECT * FROM metadatavalue WHERE metadata_field_id = ( SELECT metadata_field_id FROM metadatafieldregistry WHERE element = 'date' AND qualifier = 'issued' )
 +
*SELECT * FROM metadatavalue WHERE metadata_field_id = ( SELECT metadata_field_id FROM metadatafieldregistry WHERE element = 'contributor' AND qualifier = 'advisor' )
 +
 +
;Search for metadata
 +
*SELECT * FROM metadatavalue WHERE text_value like '%Peer%';
 +
*SELECT metadatavalue.text_value FROM public.metadatavalue WHERE metadatavalue.text_value = 'JournalArticles';
 +
*SELECT handle, text_value FROM metadatavalue, handle WHERE metadatavalue.resource_id = handle.resource_id AND text_value LIKE E'%\u0017%';
 +
 +
;Find short issue dates
 +
*SELECT * FROM metadatavalue,metadatafieldregistry WHERE metadatavalue.metadata_field_id = metadatafieldregistry.metadata_field_id AND element = 'date' AND qualifier = 'issued' AND CHAR_LENGTH(text_value) < 4;
 +
 +
;Find long issue dates
 +
*SELECT * FROM metadatavalue,metadatafieldregistry WHERE metadatavalue.metadata_field_id = metadatafieldregistry.metadata_field_id AND element = 'date' AND qualifier = 'issued' AND CHAR_LENGTH(text_value) > 10;
 +
 +
;Find empty issue dates
 +
*SELECT item_id FROM metadatavalue WHERE metadata_field_id = ( SELECT metadata_field_id FROM metadatafieldregistry WHERE element = 'date' AND qualifier = 'issued' ) AND text_value = '';
 +
 +
;Empty type
 +
*SELECT item_id FROM metadatavalue WHERE metadata_field_id = ( SELECT metadata_field_id FROM metadatafieldregistry WHERE element = 'type') AND text_value = '';
 +
 +
;List items in collections
 +
*SELECT item_id FROM collection2item WHERE collection_id= '1297';
 +
 +
;List items in collections by title
 +
*SELECT metadatavalue.item_id, metadatavalue.text_value FROM public.collection2item, public.metadatavalue WHERE collection2item.item_id = metadatavalue.item_id AND collection_id = 1289 AND metadata_field_id = 64 ORDER BY metadatavalue.item_id ASC;
 +
 +
;Update/set template
 +
*UPDATE collection SET template_item_id = AAA FROM community2collection WHERE collection.collection_id = community2collection.collection_id AND community2collection.community_id = BBB;
 +
 +
;List embargoed items
 +
*SELECT item_id, text_value FROM metadatavalue WHERE metadata_field_id = ( SELECT metadata_field_id FROM metadatafieldregistry WHERE element = 'embargo' AND qualifier = 'lift' )
 +
 +
;IR Growth - For DSpace <= 4.X
 +
 +
<pre>
 +
SELECT * FROM ((SELECT 'dc.date.accessioned' AS field, substring(text_value FROM 1 FOR 4) AS year,substring(text_value FROM 6 FOR 2) AS month, COUNT(item_id) as numberOfDocs
 +
FROM metadatavalue WHERE metadata_field_id = (SELECT metadata_field_id FROM metadatafieldregistry WHERE metadata_schema_id = 1 AND element = 'date' AND qualifier = 'accessioned')
 +
GROUP BY year, month ORDER BY year,month) UNION (SELECT 'dc.date.available' AS field, substring(text_value FROM 1 FOR 4) AS year,substring(text_value FROM 6 FOR 2) AS month, COUNT(item_id) as numberOfDocs FROM metadatavalue WHERE metadata_field_id = (SELECT metadata_field_id FROM metadatafieldregistry WHERE metadata_schema_id = 1 AND element = 'date' AND qualifier = 'available') GROUP BY year, month ORDER BY year,month)) AS accessAvail ORDER BY field, year, month;
 +
</pre>
 +
 +
 +
 +
;List of uploaded bitstreams
 +
SELECT bitstream_id, name, size_bytes, source FROM bitstream ORDER BY bitstream_id ASC
 +
 +
;List of communities and collections with items
 +
<pre>
 +
select
 +
  c2c.community_id,
 +
  cmh.handle,
 +
  i.owning_collection,
 +
  ch.handle chandle,
 +
  i.item_id,
 +
  ih.handle ihandle
 +
from
 +
  item i
 +
inner join handle ch
 +
  on ch.resource_id=i.owning_collection
 +
  and ch.resource_type_id=3
 +
inner join handle ih
 +
  on ih.resource_id = i.item_id
 +
  and ih.resource_type_id = 2
 +
inner join community2collection c2c
 +
  on c2c.collection_id = i.owning_collection
 +
inner join handle cmh
 +
  on cmh.resource_id = c2c.community_id
 +
  and cmh.resource_type_id = 4
 +
</pre>
 +
 +
;No of bitstreams
 +
<pre>
 +
      select 1
 +
      from metadatavalue m
 +
      where m.item_id = i.item_id
 +
      and m.metadata_field_id = (
 +
        select metadata_field_id from metadatafieldregistry mfr
 +
        where mfr.element = 'description' and mfr.qualifier = 'provenance'
 +
      )
 +
      and (text_value ~ '^.*No\. of bitstreams.*\.(PDF|pdf|DOC|doc|PPT|ppt|DOCX|docx|PPTX|pptx)
 +
</pre>

Latest revision as of 12:47, 4 July 2016

BACK TO SQL TIPS
Display all embargoed items

Here's a SQL query that will give you item handles for all bitstreams which have a policy with either a specified start date or end date:

SELECT h.handle, rp.*
FROM resourcepolicy rp, bundle2bitstream b2b, item2bundle i2b, handle h
WHERE (start_date IS NOT NULL OR end_date IS NOT NULL)
AND rp.resource_type_id = 0
AND b2b.bitstream_id = rp.resource_id
AND i2b.bundle_id = b2b.bundle_id
AND i2b.item_id = h.resource_id
AND h.resource_type_id = 2;
Display duplicate bitstream handles
SELECT h.handle, COUNT(b.sequence_id)
FROM handle h, item2bundle i2b, bundle2bitstream b2b, bitstream b
WHERE h.resource_type_id = 2
AND h.resource_id = i2b.item_id
AND i2b.bundle_id = b2b.bundle_id
AND b2b.bitstream_id = b.bitstream_id
GROUP BY h.handle, b.sequence_id
HAVING COUNT(b.sequence_id) > 1
ORDER BY h.handle;
Move collection to another community.

Find the collection id for the two collections.

  • UPDATE community2collection SET community_id=$NEW_PARENT WHERE community_id=$OLD_PARENT and collection_id=$COLLECTION;

In this example moving items from 7 to 2

  • update collection2item set collection_id='2' where collection_id='7';
  • update collection_item_count set count='0' where collection_id='7';

Change the new collection ID over to the existing collection

  • update item set owning_collection='2' where owning_collection='7';

Then update item counts

$HOME/bin/dspace itemcounter
Update/replace metadata
  • UPDATE metadatavalue SET text_value = 'Journal Article' WHERE text_value = 'Journal Articles'
  • UPDATE metadatavalue SET text_value = 'Article' WHERE text_value = (SELECT metadatavalue.text_value FROM public.metadatavalue WHERE metadatavalue.text_value = 'JournalArticles'); UPDATE +1
Extract metadata
  • SELECT * FROM metadatavalue WHERE metadata_field_id = ( SELECT metadata_field_id FROM metadatafieldregistry WHERE element = 'date' AND qualifier = 'issued' )
  • SELECT * FROM metadatavalue WHERE metadata_field_id = ( SELECT metadata_field_id FROM metadatafieldregistry WHERE element = 'contributor' AND qualifier = 'advisor' )
Search for metadata
  • SELECT * FROM metadatavalue WHERE text_value like '%Peer%';
  • SELECT metadatavalue.text_value FROM public.metadatavalue WHERE metadatavalue.text_value = 'JournalArticles';
  • SELECT handle, text_value FROM metadatavalue, handle WHERE metadatavalue.resource_id = handle.resource_id AND text_value LIKE E'%\u0017%';
Find short issue dates
  • SELECT * FROM metadatavalue,metadatafieldregistry WHERE metadatavalue.metadata_field_id = metadatafieldregistry.metadata_field_id AND element = 'date' AND qualifier = 'issued' AND CHAR_LENGTH(text_value) < 4;
Find long issue dates
  • SELECT * FROM metadatavalue,metadatafieldregistry WHERE metadatavalue.metadata_field_id = metadatafieldregistry.metadata_field_id AND element = 'date' AND qualifier = 'issued' AND CHAR_LENGTH(text_value) > 10;
Find empty issue dates
  • SELECT item_id FROM metadatavalue WHERE metadata_field_id = ( SELECT metadata_field_id FROM metadatafieldregistry WHERE element = 'date' AND qualifier = 'issued' ) AND text_value = ;
Empty type
  • SELECT item_id FROM metadatavalue WHERE metadata_field_id = ( SELECT metadata_field_id FROM metadatafieldregistry WHERE element = 'type') AND text_value = ;
List items in collections
  • SELECT item_id FROM collection2item WHERE collection_id= '1297';
List items in collections by title
  • SELECT metadatavalue.item_id, metadatavalue.text_value FROM public.collection2item, public.metadatavalue WHERE collection2item.item_id = metadatavalue.item_id AND collection_id = 1289 AND metadata_field_id = 64 ORDER BY metadatavalue.item_id ASC;
Update/set template
  • UPDATE collection SET template_item_id = AAA FROM community2collection WHERE collection.collection_id = community2collection.collection_id AND community2collection.community_id = BBB;
List embargoed items
  • SELECT item_id, text_value FROM metadatavalue WHERE metadata_field_id = ( SELECT metadata_field_id FROM metadatafieldregistry WHERE element = 'embargo' AND qualifier = 'lift' )
IR Growth - For DSpace <= 4.X
SELECT * FROM ((SELECT 'dc.date.accessioned' AS field, substring(text_value FROM 1 FOR 4) AS year,substring(text_value FROM 6 FOR 2) AS month, COUNT(item_id) as numberOfDocs
FROM metadatavalue WHERE metadata_field_id = (SELECT metadata_field_id FROM metadatafieldregistry WHERE metadata_schema_id = 1 AND element = 'date' AND qualifier = 'accessioned')
GROUP BY year, month ORDER BY year,month) UNION (SELECT 'dc.date.available' AS field, substring(text_value FROM 1 FOR 4) AS year,substring(text_value FROM 6 FOR 2) AS month, COUNT(item_id) as numberOfDocs FROM metadatavalue WHERE metadata_field_id = (SELECT metadata_field_id FROM metadatafieldregistry WHERE metadata_schema_id = 1 AND element = 'date' AND qualifier = 'available') GROUP BY year, month ORDER BY year,month)) AS accessAvail ORDER BY field, year, month;


List of uploaded bitstreams

SELECT bitstream_id, name, size_bytes, source FROM bitstream ORDER BY bitstream_id ASC

List of communities and collections with items
select
  c2c.community_id,
  cmh.handle,
  i.owning_collection,
  ch.handle chandle,
  i.item_id,
  ih.handle ihandle
from
  item i
inner join handle ch
  on ch.resource_id=i.owning_collection
  and ch.resource_type_id=3
inner join handle ih
  on ih.resource_id = i.item_id
  and ih.resource_type_id = 2
inner join community2collection c2c
  on c2c.collection_id = i.owning_collection
inner join handle cmh 
  on cmh.resource_id = c2c.community_id
  and cmh.resource_type_id = 4
No of bitstreams
      select 1
      from metadatavalue m 
      where m.item_id = i.item_id
      and m.metadata_field_id = (
        select metadata_field_id from metadatafieldregistry mfr
        where mfr.element = 'description' and mfr.qualifier = 'provenance'
      )
      and (text_value ~ '^.*No\. of bitstreams.*\.(PDF|pdf|DOC|doc|PPT|ppt|DOCX|docx|PPTX|pptx)