metadata extraction

48 views
Skip to first unread message

whi...@gmail.com

unread,
Mar 2, 2009, 11:28:13 PM3/2/09
to Omeka Dev
I noticed that Omeka has media metadata extraction function under Item
file upload. But I can only find the Exif metadata at Item file
metadata page, IPTC field is empty (the uploaded image has IPTC
metadata). Can we get other metadata such as IPTC, XML? Thanks!

Dave Lester

unread,
Mar 5, 2009, 1:48:37 PM3/5/09
to Omeka Dev
You mentioned on the forums that you were able to add some additional
code to fix the problems importing IPTC data from files. Could you
share the modifications to Omeka's code that you made?

whi...@gmail.com

unread,
Mar 5, 2009, 10:17:43 PM3/5/09
to Omeka Dev
Hi, here is the modified code for importing IPTC data:

<?php
/**
* @version $Id$
* @copyright Center for History and New Media, 2007-2008
* @license http://www.gnu.org/licenses/gpl-3.0.txt
* @package Omeka
**/

/**
* @todo Testing.
* @package Omeka
* @subpackage Models
* @author CHNM
* @copyright Center for History and New Media, 2007-2008
**/
class FilesImages
{
public $iptc_string;
public $iptc_array;

public function initialize($id3, $pathToFile)
{
$this->id3 = $id3;
$this->pathToFile = $pathToFile;

// Extract EXIF data if possible.
if (function_exists('exif_read_data') and ($exif =
@exif_read_data($this->pathToFile))) {
$this->exif = $exif;
} else {
$this->exif = array();
}

//add iptc image metadata extract customized content
$imageinfo = null;
if(function_exists('getimagesize'))
$imagesize = getimagesize($this->pathToFile, $imageinfo);
//
// Extract IPTC data also if possible.
if ($iptc = $this->get_iptc_info($imageinfo["APP13"])) {
$this->iptc = $iptc;
} else {
$this->iptc = array();
}
}

private function get_iptc_info($info) {
$iptc_match = array();
$iptc_match['2#120'] = "caption";
$iptc_match['2#122'] = "caption_writer";
$iptc_match['2#105'] = "headline";
$iptc_match['2#040'] = "special_instructions";
$iptc_match['2#080'] = "byline";
$iptc_match['2#085'] = "byline_title";
$iptc_match['2#110'] = "credit";
$iptc_match['2#115'] = "source";
$iptc_match['2#005'] = "object_name";
$iptc_match['2#055'] = "date_created";
$iptc_match['2#090'] = "city";
$iptc_match['2#095'] = "state";
$iptc_match['2#101'] = "country";
$iptc_match['2#103'] = "original_transmission_reference";
$iptc_match['2#015'] = "category";
$iptc_match['2#020'] = "supplemental_category";
$iptc_match['2#025'] = "keyword";
$iptc_match['2#116'] = "copyright_notice";

$iptc = iptcparse($info);
$iptc_array = array();
if (is_array($iptc)) {
foreach ($iptc as $key => $val) {
if (isset($iptc_match[$key])) {
$iptc_info = "";
foreach ($val as $val2) {
$iptc_info .= (($iptc_info != "" ) ? ", " : "").$val2;
}
if ($key == "2#055") {
$iptc_array[$iptc_match[$key]] = preg_replace("/([0-9]{4})
([0-9]{2})([0-9]{2})/", "\\3.\\2.\\1", $iptc_info);
}
else {
$iptc_array[$iptc_match[$key]] = $this->replace_url
($iptc_info);
}
}
}
}
return $iptc_array;
}

private function replace_url($text) {
$text = " ".$text." ";
$url_search_array = array(
"#([^]_a-z0-9-=\"'\/])([a-z]+?)://([^, \(\)<>\n\r]+)#si",
"#([^]_a-z0-9-=\"'\/])www\.([a-z0-9\-]+)\.([a-z0-9\-.\~]+)((?:/
[^, \(\)<>\n\r]*)?)#si"
);

$url_replace_array = array(
"\\1<a href=\"\\2://\\3\" target=\"_blank\">\\2://\\3</a>",
"\\1<a href=\"http://www.\\2.\\3\\4\" target=\"_blank\">www.\\2.\
\3\\4</a>"
);
$text = preg_replace($url_search_array, $url_replace_array, $text);

if (strpos($text, "@")) {
$text = preg_replace("#([\n ])([a-z0-9\-_.]+?)@([\w\-]+\.([\w\-\.]
+\.)?[\w]+)#i", "\\1<a href=\"mailto:\\2@\\3\">\\2@\\3</a>", $text);
}
return trim($text);
}

public function getWidth()
{
return $this->size[0];
}

public function getHeight()
{
return $this->size[1];
}

public function getBitDepth()
{
return $this->size['bits'];
}

public function getChannels()
{
return $this->size['channels'];
}

public function getExifArray()
{
if (!empty($this->exif)) {
return serialize($this->exif);
}
}

public function getExifString()
{
//Convert the exif to a string as for to store it
$exif_string = '';
foreach ($this->exif as $k => $v) {
$exif_string .= $k . ':';
if (is_array($v)) {
$exif_string .= "\n";
foreach ($v as $key => $value) {
$exif_string .= "\t" . $key . ':' . $value .
"\n";
}
} else {
$exif_string .= $v;
}
$exif_string .= "\n";
}
return $exif_string;
}

public function getIPTCArray()
{
return $this->iptc;
}

public function getIPTCString()
{
return print_r($this->iptc, true);

Dave Lester

unread,
Mar 9, 2009, 3:38:11 AM3/9/09
to Omeka Dev
Is it possible for you to create an SVN patch and share that with the
group? If you're unsure how to do that, here's a link that may help:
http://ariejan.net/2007/07/03/how-to-create-and-apply-a-patch-with-subversion/

I'd like to review the specific changes you've made to this file.

Dave

On Mar 5, 11:17 pm, "whis...@gmail.com" <whis...@gmail.com> wrote:
> Hi, here is the modified code for importing IPTC data:
>
> <?php
> /**
>  * @version $Id$
>  * @copyright Center for History and New Media, 2007-2008
>  * @licensehttp://www.gnu.org/licenses/gpl-3.0.txt

Junran

unread,
Mar 9, 2009, 11:03:39 PM3/9/09
to Omeka Dev
I have uploaded the diff file (iptc_code.diff) to the group.

Junran

On Mar 9, 6:38 pm, Dave Lester <daveles...@gmail.com> wrote:
> Is it possible for you to create an SVN patch and share that with the
> group? If you're unsure how to do that, here's a link that may help:http://ariejan.net/2007/07/03/how-to-create-and-apply-a-patch-with-su...

Dave Lester

unread,
Mar 23, 2009, 10:59:06 AM3/23/09
to Omeka Dev
Thanks for sharing the diff. I've added a ticket on Trac to review
these changes: https://omeka.org/trac/ticket/688

If you or anyone else would like access to Trac to comment on tickets,
send me an email (da...@omeka.org) and I'll gladly set you up.

Best,
Dave
Reply all
Reply to author
Forward
0 new messages