diff options
author | gregor herrmann <gregoa@debian.org> | 2024-03-16 23:57:00 +0100 |
---|---|---|
committer | gregor herrmann <gregoa@debian.org> | 2024-03-16 23:57:00 +0100 |
commit | 0a5cff0c2b2e81cf07b6c6dde3a01eb5a86ce1b4 (patch) | |
tree | de280dab88ee9c1eae9df83447577cdc277fe4ec | |
parent | 16ce6ba60b24fcbe8f32f50d69e32b46aee27655 (diff) |
New upstream version 0.18
-rw-r--r-- | Changes | 74 | ||||
-rw-r--r-- | MANIFEST | 46 | ||||
-rw-r--r-- | META.json | 4 | ||||
-rw-r--r-- | META.yml | 4 | ||||
-rw-r--r-- | README | 12 | ||||
-rw-r--r-- | lib/Spreadsheet/XLSX.pm | 812 | ||||
-rw-r--r-- | lib/Spreadsheet/XLSX/Fmt2007.pm | 61 | ||||
-rw-r--r-- | lib/Spreadsheet/XLSX/Utility2007.pm | 2 |
8 files changed, 523 insertions, 492 deletions
@@ -1,87 +1,91 @@ Revision history for Perl extension Spreadsheet::XLSX. +0.18 + + - fix RT #117166: unformatted date values can no longer be retrieved, thx to lherschi + 0.17 - - fix RT #139898: missing file from MANIFEST + - fix RT #139898: missing file from MANIFEST - fix RT #127829, #80565, #79016: [Warning] Argument "#N/A" isn't numeric in int 0.16 - - added GitHub repository - - fix RT #125112: Update module name in comments and POD - - improve POD - - improve kwalitee test + - added GitHub repository + - fix RT #125112: Update module name in comments and POD + - improve POD + - improve kwalitee test 0.15 - - revert a numeric formatting change which caused isssues with Spreadsheet::Read. + - revert a numeric formatting change which caused isssues with Spreadsheet::Read. 0.14 - - change default date format to yyyy-mm-dd. This matches Spreadsheet::ParseExcel. - - handle xml tag attributes in varying order (RT #86667, et.al.) + - change default date format to yyyy-mm-dd. This matches Spreadsheet::ParseExcel. + - handle xml tag attributes in varying order (RT #86667, et.al.) 0.13 Sun May 16 13:08:12 MSD 2010 - - ability to read xlsx from filehandle (RT #57483, thanks Sergey Pushkin) + - ability to read xlsx from filehandle (RT #57483, thanks Sergey Pushkin) 0.12 Tue Oct 6 10:04:37 MSD 2009 - - sheets are now detected by relations (xl/_rels/workbook.xml.rels), not numbers (RT #50236, thanks Pat Mariani) + - sheets are now detected by relations (xl/_rels/workbook.xml.rels), not numbers (RT #50236, thanks Pat Mariani) 0.11 Mon Oct 5 19:03:46 MSD 2009 - - sheets numbering fixed (RT #50211, thanks endacoe) + - sheets numbering fixed (RT #50211, thanks endacoe) 0.1 Wed Mar 25 18:19:46 MSK 2009 - - bypassing empty sheets (thanks Lukasz Wilun for an example with diagrams); - - rich text within a cell (by Rob Polocz); + - bypassing empty sheets (thanks Lukasz Wilun for an example with diagrams); + - rich text within a cell (by Rob Polocz); 0.09 Mon Jan 26 09:57:04 MSK 2009 - - fixed using of $1 after unsuccessful m// (thanks HMBRAND); + - fixed using of $1 after unsuccessful m// (thanks HMBRAND); 0.08 Mon Jan 5 18:07:57 MSK 2009 - - Perl version requirement lowered (thanks HMBRAND) + - Perl version requirement lowered (thanks HMBRAND) -0.07 Thu Dec 18 09:46:18 MSK 2008 +0.07 Thu Dec 18 09:46:18 MSK 2008 - - "Use of uninitialized value ..." warnings suppressed (thanks Gregor Herrmann) - + - "Use of uninitialized value ..." warnings suppressed (thanks Gregor Herrmann) + 0.04 Fri Nov 14 09:25:47 MSK 2008 - - RE fixed for the case of opening <t ...> with attributes (thanks Loreyna Yeung) + - RE fixed for the case of opening <t ...> with attributes (thanks Loreyna Yeung) 0.06 Tue Dec 16 18:06:17 MSK 2008 - - files missed from MANIFEST and, therefore, from .tar.gz :-( Sorry, everybody! + - files missed from MANIFEST and, therefore, from .tar.gz :-( Sorry, everybody! 0.05 Fri Dec 12 17:28:23 MSK 2008 - - - a lot of fixes by Rob Polocz (dependency on Spreadsheet::ParseExcel introduced): - - -- Added support for styles and formatted strings; - -- create and use ParseExcel Workbook, Spreadsheet, and Cell objects; - -- 1904 date convention support; - -- empty tag support; - -- received permission from the Spreadsheet::ParseExcel guys to leverage the formatting classes and check them in to this project. + + - a lot of fixes by Rob Polocz (dependency on Spreadsheet::ParseExcel introduced): + + -- Added support for styles and formatted strings; + -- create and use ParseExcel Workbook, Spreadsheet, and Cell objects; + -- 1904 date convention support; + -- empty tag support; + -- received permission from the Spreadsheet::ParseExcel guys to leverage the formatting classes and check them in to this project. 0.03 Tue May 20 05:18:41 UTC 2008 - - fixed the incorrect mapping for columns > AA (thanks JMELTZER http://rt.cpan.org/Public/Bug/Display.html?id=36013) - - added support for r:id attribute (Ibid) + - fixed the incorrect mapping for columns > AA (thanks JMELTZER http://rt.cpan.org/Public/Bug/Display.html?id=36013) + - added support for r:id attribute (Ibid) 0.02 Mon May 12 07:02:31 UTC 2008 - - fixed a bug with 1st dictionnary string (thanks SSIMMS: http://rt.cpan.org/Public/Bug/Display.html?id=35489) - - prereq relaxed (thanx SSIMMS: http://rt.cpan.org/Public/Bug/Display.html?id=35490) + - fixed a bug with 1st dictionnary string (thanks SSIMMS: http://rt.cpan.org/Public/Bug/Display.html?id=35489) + - prereq relaxed (thanx SSIMMS: http://rt.cpan.org/Public/Bug/Display.html?id=35490) 0.01 Thu Mar 13 09:41:52 2008 - - original version; created by h2xs 1.23 with options - -A -X -n Spreadsheet::XLSC --skip-exporter --skip-autoloader + - original version; created by h2xs 1.23 with options + -A -X -n Spreadsheet::XLSC --skip-exporter --skip-autoloader - + @@ -1,23 +1,23 @@ -Changes -lib/Spreadsheet/XLSX.pm -lib/Spreadsheet/XLSX/Fmt2007.pm -lib/Spreadsheet/XLSX/Utility2007.pm -Makefile.PL -MANIFEST This list of files -META.yml Module meta-data (added by MakeMaker) -README -t/0____________use.t -t/1_____loreyna126.t -t/1_____loreyna126.xlsx -t/2_____with_chart.t -t/2_____with_chart.xlsx -t/3_____invalid_formular.t -t/3_____invalid_formular.xlsx -t/empty_v_tag.t -t/empty_v_tag.xlsx -t/formats.t -t/formats.xlsx -t/kwalitee.t -t/missing_styles.t -t/missing_styles.xlsx -META.json Module JSON meta-data (added by MakeMaker) +Changes
+lib/Spreadsheet/XLSX.pm
+lib/Spreadsheet/XLSX/Fmt2007.pm
+lib/Spreadsheet/XLSX/Utility2007.pm
+Makefile.PL
+MANIFEST This list of files
+META.yml Module meta-data (added by MakeMaker)
+README
+t/0____________use.t
+t/1_____loreyna126.t
+t/1_____loreyna126.xlsx
+t/2_____with_chart.t
+t/2_____with_chart.xlsx
+t/3_____invalid_formular.t
+t/3_____invalid_formular.xlsx
+t/empty_v_tag.t
+t/empty_v_tag.xlsx
+t/formats.t
+t/formats.xlsx
+t/kwalitee.t
+t/missing_styles.t
+t/missing_styles.xlsx
+META.json Module JSON meta-data (added by MakeMaker)
@@ -1,5 +1,5 @@ { - "abstract" : "Perl extension for reading MS Excel 2007 files;", + "abstract" : "Perl extension for reading MS Excel 2007 files.", "author" : [ "Dmitry Ovsyanko <do@eludia.ru>", "Mike Blackwell <mike.blackwell@rrd.com" @@ -51,6 +51,6 @@ "url" : "https://github.com/asb-capfan/Spreadsheet-XLSX" } }, - "version" : "0.17", + "version" : "0.18", "x_serialization_backend" : "JSON::PP version 4.04" } @@ -1,5 +1,5 @@ --- -abstract: 'Perl extension for reading MS Excel 2007 files;' +abstract: 'Perl extension for reading MS Excel 2007 files.' author: - 'Dmitry Ovsyanko <do@eludia.ru>' - 'Mike Blackwell <mike.blackwell@rrd.com' @@ -26,5 +26,5 @@ requires: Spreadsheet::ParseExcel: '0' resources: repository: https://github.com/asb-capfan/Spreadsheet-XLSX -version: '0.17' +version: '0.18' x_serialization_backend: 'CPAN::Meta::YAML version 0.018' @@ -19,6 +19,18 @@ This module requires these other modules and libraries: Archive::Zip Spreadsheet::ParseExcel +BUILD AND RELEASE INSTRUCTIONS + +Note: Meta files (META.json and META.yml) have to be generated using ExtUtils::MakeMaker to satisfy the MANIFEST file. + +Use the commands: + + perl Makefile.PL + make distdir + make disttest + make dist + + COPYRIGHT AND LICENCE Copyright (C) 2008 by Dmitry Ovsyanko diff --git a/lib/Spreadsheet/XLSX.pm b/lib/Spreadsheet/XLSX.pm index 40819db..21f1369 100644 --- a/lib/Spreadsheet/XLSX.pm +++ b/lib/Spreadsheet/XLSX.pm @@ -1,406 +1,406 @@ -package Spreadsheet::XLSX; - -use 5.006000; -use strict; -use warnings; - -use base 'Spreadsheet::ParseExcel::Workbook'; - -our $VERSION = '0.17'; - -use Archive::Zip; -use Spreadsheet::ParseExcel; -use Spreadsheet::XLSX::Fmt2007; - -################################################################################ - -sub new { - my ($class, $filename, $converter) = @_; - - my %shared_info; # shared_strings, styles, style_info, rels, converter - $shared_info{converter} = $converter; - - my $self = bless Spreadsheet::ParseExcel::Workbook->new(), $class; - - my $zip = __load_zip($filename); - - $shared_info{shared_strings}= __load_shared_strings($zip, $shared_info{converter}); - my ($styles, $style_info) = __load_styles($zip); - $shared_info{styles} = $styles; - $shared_info{style_info} = $style_info; - $shared_info{rels} = __load_rels($zip); - - $self->_load_workbook($zip, \%shared_info); - - return $self; -} - -sub _load_workbook { - my ($self, $zip, $shared_info) = @_; - - my $member_workbook = $zip->memberNamed('xl/workbook.xml') or die("xl/workbook.xml not found in this zip\n"); - $self->{SheetCount} = 0; - $self->{FmtClass} = Spreadsheet::XLSX::Fmt2007->new; - $self->{Flg1904} = 0; - if ($member_workbook->contents =~ /date1904="1"/) { - $self->{Flg1904} = 1; - } - - foreach ($member_workbook->contents =~ /\<(.*?)\/?\>/g) { - - /^(\w+)\s+/; - - my ($tag, $other) = ($1, $'); - - my @pairs = split /\" /, $other; - - $tag eq 'sheet' or next; - - my $sheet = { - MaxRow => 0, - MaxCol => 0, - MinRow => 1000000, - MinCol => 1000000, - }; - - foreach ($other =~ /(\S+=".*?")/gsm) { - - my ($k, $v) = split /=?"/; #" - - if ($k eq 'name') { - $sheet->{Name} = $v; - $sheet->{Name} = $shared_info->{converter}->convert($sheet->{Name}) if defined $shared_info->{converter}; - } elsif ($k eq 'r:id') { - - $sheet->{path} = $shared_info->{rels}->{$v}; - - } - - } - - my $wsheet = Spreadsheet::ParseExcel::Worksheet->new(%$sheet); - $self->{Worksheet}[$self->{SheetCount}] = $wsheet; - $self->{SheetCount} += 1; - - } - - - foreach my $sheet (@{$self->{Worksheet}}) { - - my $member_sheet = $zip->memberNamed("xl/$sheet->{path}") or next; - - my ($row, $col); - - my $parsing_v_tag = 0; - my $s = 0; - my $s2 = 0; - my $sty = 0; - foreach ($member_sheet->contents =~ /(\<.*?\/?\>|.*?(?=\<))/g) { - if (/^\<c\s*.*?\s*r=\"([A-Z])([A-Z]?)(\d+)\"/) { - - ($row, $col) = __decode_cell_name($1, $2, $3); - - $s = m/t=\"s\"/ ? 1 : 0; - $s2 = m/t=\"str\"/ ? 1 : 0; - $sty = m/s="([0-9]+)"/ ? $1 : 0; - - } elsif (/^<v>/) { - $parsing_v_tag = 1; - } elsif (/^<\/v>/) { - $parsing_v_tag = 0; - } elsif (length($_) && $parsing_v_tag) { - my $v = $s ? $shared_info->{shared_strings}->[$_] : $_; - - if ($v eq "</c>") { - $v = ""; - } - my $type = "Text"; - my $thisstyle = ""; - - if (not($s) && not($s2)) { - $type = "Numeric"; - - if (defined $sty && defined $shared_info->{styles}->[$sty]) { - $thisstyle = $shared_info->{style_info}->{$shared_info->{styles}->[$sty]}; - if ($thisstyle =~ /\b(mmm|m|d|yy|h|hh|mm|ss)\b/) { - $type = "Date"; - } - } - } - - - $sheet->{MaxRow} = $row if $sheet->{MaxRow} < $row; - $sheet->{MaxCol} = $col if $sheet->{MaxCol} < $col; - $sheet->{MinRow} = $row if $sheet->{MinRow} > $row; - $sheet->{MinCol} = $col if $sheet->{MinCol} > $col; - - if ($v =~ /(.*)E\-(.*)/gsm && $type eq "Numeric") { - $v = $1 / (10**$2); # this handles scientific notation for very small numbers - } - - my $cell = Spreadsheet::ParseExcel::Cell->new( - Val => $v, - Format => $thisstyle, - Type => $type - ); - - $cell->{_Value} = $self->{FmtClass}->ValFmt($cell, $self); - if ($type eq "Date") { - if ($v < 1) { #then this is Excel time field - $cell->{Type} = "Text"; - } - $cell->{Val} = $cell->{_Value}; - } - $sheet->{Cells}[$row][$col] = $cell; - } - } - - $sheet->{MinRow} = 0 if $sheet->{MinRow} > $sheet->{MaxRow}; - $sheet->{MinCol} = 0 if $sheet->{MinCol} > $sheet->{MaxCol}; - - } - - return $self; -} - -# Convert cell name in the format AA1 to a row and column number. - -sub __decode_cell_name { - my ($letter1, $letter2, $digits) = @_; - - my $col = ord($letter1) - 65; - - if ($letter2) { - $col++; - $col *= 26; - $col += (ord($letter2) - 65); - } - - my $row = $digits - 1; - - return ($row, $col); -} - - -sub __load_shared_strings { - my ($zip, $converter) = @_; - - my $member_shared_strings = $zip->memberNamed('xl/sharedStrings.xml'); - - my @shared_strings = (); - - if ($member_shared_strings) { - - my $mstr = $member_shared_strings->contents; - $mstr =~ s/<t\/>/<t><\/t>/gsm; # this handles an empty t tag in the xml <t/> - foreach my $si ($mstr =~ /<si.*?>(.*?)<\/si/gsm) { - my $str; - foreach my $t ($si =~ /<t.*?>(.*?)<\/t/gsm) { - $t = $converter->convert($t) if defined $converter; - $str .= $t; - } - push @shared_strings, $str; - } - } - - return \@shared_strings; -} - - -sub __load_styles { - my ($zip) = @_; - - my $member_styles = $zip->memberNamed('xl/styles.xml'); - - my @styles = (); - my %style_info = (); - - if ($member_styles) { - my $formatter = Spreadsheet::XLSX::Fmt2007->new(); - - foreach my $t ($member_styles->contents =~ /xf\ numFmtId="([^"]*)"(?!.*\/cellStyleXfs)/gsm) { #" - push @styles, $t; - } - - my $default = $1 || ''; - - foreach my $t1 (@styles) { - $member_styles->contents =~ /numFmtId="$t1" formatCode="([^"]*)/; - my $formatCode = $1 || ''; - if ($formatCode eq $default || not($formatCode)) { - if ($t1 == 9 || $t1 == 10) { - $formatCode = '0.00000%'; - } elsif ($t1 == 14) { - $formatCode = 'yyyy-mm-dd'; - } else { - $formatCode = ''; - } -# $formatCode = $formatter->FmtStringDef($t1); - } - $style_info{$t1} = $formatCode; - $default = $1 || ''; - } - - } - return (\@styles, \%style_info); -} - - -sub __load_rels { - my ($zip) = @_; - - my $member_rels = $zip->memberNamed('xl/_rels/workbook.xml.rels') or die("xl/_rels/workbook.xml.rels not found in this zip\n"); - - my %rels = (); - - foreach ($member_rels->contents =~ /\<Relationship (.*?)\/?\>/g) { - - my ($id, $target); - ($id) = /Id="(.*?)"/; - ($target) = /Target="(.*?)"/; - - if (defined $id and defined $target) { - $rels{$id} = $target; - } - - } - - return \%rels; -} - -sub __load_zip { - my ($filename) = @_; - - my $zip = Archive::Zip->new(); - - if (ref $filename) { - $zip->readFromFileHandle($filename) == Archive::Zip::AZ_OK or die("Cannot open data as Zip archive"); - } else { - $zip->read($filename) == Archive::Zip::AZ_OK or die("Cannot open $filename as Zip archive"); - } - - return $zip; -} - - -1; -__END__ - -=head1 NAME - -Spreadsheet::XLSX - Perl extension for reading MS Excel 2007 files; - -=head1 SYNOPSIS - - use Text::Iconv; - my $converter = Text::Iconv->new("utf-8", "windows-1251"); - - # Text::Iconv is not really required. - # This can be any object with the convert method. Or nothing. - - use Spreadsheet::XLSX; - - my $excel = Spreadsheet::XLSX->new('test.xlsx', $converter); - - foreach my $sheet (@{$excel->{Worksheet}}) { - - printf("Sheet: %s\n", $sheet->{Name}); - - $sheet->{MaxRow} ||= $sheet->{MinRow}; - - foreach my $row ($sheet->{MinRow} .. $sheet->{MaxRow}) { - - $sheet->{MaxCol} ||= $sheet->{MinCol}; - - foreach my $col ($sheet->{MinCol} .. $sheet->{MaxCol}) { - - my $cell = $sheet->{Cells}[$row][$col]; - - if ($cell) { - printf("( %s , %s ) => %s\n", $row, $col, $cell->{Val}); - } - - } - - } - - } - -=head1 DESCRIPTION - -This module is a (quick and dirty) emulation of L<Spreadsheet::ParseExcel> for -Excel 2007 (.xlsx) file format. It supports styles and many of Excel's quirks, -but not all. It populates the classes from L<Spreadsheet::ParseExcel> for interoperability; -including Workbook, Worksheet, and Cell. - -=head1 SEE ALSO - -=over 2 - -=item L<Spreadsheet::ParseXLSX> - -This module has some serious issues with the way it uses regexs for parsing the XML. -I would strongly encourage switching to L<Spreadsheet::ParseXLSX> which takes a more reliable approach. - -=item L<Text::CSV_XS>, L<Text::CSV_PP> - -=item L<Spreadsheet::ParseExcel> - -=item L<Spreadsheet::ReadSXC> - -=item L<Spreadsheet::BasicRead> - -for xlscat likewise functionality (Excel only) - -=item Spreadsheet::ConvertAA - -for an alternative set of C<cell2cr()> / C<cr2cell()> pair - -=item L<Spreadsheet::Perl> - -offers a Pure Perl implementation of a -spreadsheet engine. Users that want this format to be supported in -L<Spreadsheet::Read> are hereby motivated to offer patches. It's not high -on my todo-list. - -=item xls2csv - -L<https://metacpan.org/release/KEN/xls2csv-1.07> offers an alternative for my C<xlscat -c>, -in the xls2csv tool, but this tool focusses on character encoding -transparency, and requires some other modules. - -=item L<Spreadsheet::Read> - -read the data from a spreadsheet (interface module) - -=back - -=head1 AUTHOR - -Dmitry Ovsyanko, E<lt>do@eludia.ruE<gt>, http://eludia.ru/wiki/ - -Patches by: - - Steve Simms - Joerg Meltzer - Loreyna Yeung - Rob Polocz - Gregor Herrmann - H.Merijn Brand - endacoe - Pat Mariani - Sergey Pushkin - -=head1 ACKNOWLEDGEMENTS - - Thanks to TrackVia Inc. (http://www.trackvia.com) for paying for Rob Polocz working time. - -=head1 COPYRIGHT AND LICENSE - -Copyright (C) 2008 by Dmitry Ovsyanko - -This library is free software; you can redistribute it and/or modify -it under the same terms as Perl itself, either Perl version 5.8.8 or, -at your option, any later version of Perl 5 you may have available. - -=cut +package Spreadsheet::XLSX;
+
+use 5.006000;
+use strict;
+use warnings;
+
+use base 'Spreadsheet::ParseExcel::Workbook';
+
+our $VERSION = '0.18';
+
+use Archive::Zip;
+use Spreadsheet::ParseExcel;
+use Spreadsheet::XLSX::Fmt2007;
+
+################################################################################
+
+sub new {
+ my ($class, $filename, $converter) = @_;
+
+ my %shared_info; # shared_strings, styles, style_info, rels, converter
+ $shared_info{converter} = $converter;
+
+ my $self = bless Spreadsheet::ParseExcel::Workbook->new(), $class;
+
+ my $zip = __load_zip($filename);
+
+ $shared_info{shared_strings}= __load_shared_strings($zip, $shared_info{converter});
+ my ($styles, $style_info) = __load_styles($zip);
+ $shared_info{styles} = $styles;
+ $shared_info{style_info} = $style_info;
+ $shared_info{rels} = __load_rels($zip);
+
+ $self->_load_workbook($zip, \%shared_info);
+
+ return $self;
+}
+
+sub _load_workbook {
+ my ($self, $zip, $shared_info) = @_;
+
+ my $member_workbook = $zip->memberNamed('xl/workbook.xml') or die("xl/workbook.xml not found in this zip\n");
+ $self->{SheetCount} = 0;
+ $self->{FmtClass} = Spreadsheet::XLSX::Fmt2007->new;
+ $self->{Flg1904} = 0;
+ if ($member_workbook->contents =~ /date1904="1"/) {
+ $self->{Flg1904} = 1;
+ }
+
+ foreach ($member_workbook->contents =~ /\<(.*?)\/?\>/g) {
+
+ /^(\w+)\s+/;
+
+ my ($tag, $other) = ($1, $');
+
+ my @pairs = split /\" /, $other;
+
+ $tag eq 'sheet' or next;
+
+ my $sheet = {
+ MaxRow => 0,
+ MaxCol => 0,
+ MinRow => 1000000,
+ MinCol => 1000000,
+ };
+
+ foreach ($other =~ /(\S+=".*?")/gsm) {
+
+ my ($k, $v) = split /=?"/; #"
+
+ if ($k eq 'name') {
+ $sheet->{Name} = $v;
+ $sheet->{Name} = $shared_info->{converter}->convert($sheet->{Name}) if defined $shared_info->{converter};
+ } elsif ($k eq 'r:id') {
+
+ $sheet->{path} = $shared_info->{rels}->{$v};
+
+ }
+
+ }
+
+ my $wsheet = Spreadsheet::ParseExcel::Worksheet->new(%$sheet);
+ $self->{Worksheet}[$self->{SheetCount}] = $wsheet;
+ $self->{SheetCount} += 1;
+
+ }
+
+
+ foreach my $sheet (@{$self->{Worksheet}}) {
+
+ my $member_sheet = $zip->memberNamed("xl/$sheet->{path}") or next;
+
+ my ($row, $col);
+
+ my $parsing_v_tag = 0;
+ my $s = 0;
+ my $s2 = 0;
+ my $sty = 0;
+ foreach ($member_sheet->contents =~ /(\<.*?\/?\>|.*?(?=\<))/g) {
+ if (/^\<c\s*.*?\s*r=\"([A-Z])([A-Z]?)(\d+)\"/) {
+
+ ($row, $col) = __decode_cell_name($1, $2, $3);
+
+ $s = m/t=\"s\"/ ? 1 : 0;
+ $s2 = m/t=\"str\"/ ? 1 : 0;
+ $sty = m/s="([0-9]+)"/ ? $1 : 0;
+
+ } elsif (/^<v>/) {
+ $parsing_v_tag = 1;
+ } elsif (/^<\/v>/) {
+ $parsing_v_tag = 0;
+ } elsif (length($_) && $parsing_v_tag) {
+ my $v = $s ? $shared_info->{shared_strings}->[$_] : $_;
+
+ if ($v eq "</c>") {
+ $v = "";
+ }
+ my $type = "Text";
+ my $thisstyle = "";
+
+ if (not($s) && not($s2)) {
+ $type = "Numeric";
+
+ if (defined $sty && defined $shared_info->{styles}->[$sty]) {
+ $thisstyle = $shared_info->{style_info}->{$shared_info->{styles}->[$sty]};
+ if ($thisstyle =~ /\b(mmm|m|d|yy|h|hh|mm|ss)\b/) {
+ $type = "Date";
+ }
+ }
+ }
+
+
+ $sheet->{MaxRow} = $row if $sheet->{MaxRow} < $row;
+ $sheet->{MaxCol} = $col if $sheet->{MaxCol} < $col;
+ $sheet->{MinRow} = $row if $sheet->{MinRow} > $row;
+ $sheet->{MinCol} = $col if $sheet->{MinCol} > $col;
+
+ if ($v =~ /(.*)E\-(.*)/gsm && $type eq "Numeric") {
+ $v = $1 / (10**$2); # this handles scientific notation for very small numbers
+ }
+
+ my $cell = Spreadsheet::ParseExcel::Cell->new(
+ Val => $v,
+ Format => $thisstyle,
+ Type => $type
+ );
+
+ $cell->{_Value} = $self->{FmtClass}->ValFmt($cell, $self);
+ if ($type eq "Date") {
+ if ($v < 1) { #then this is Excel time field
+ $cell->{Type} = "Text";
+ $cell->{Val} = $cell->{_Value};
+ }
+ }
+ $sheet->{Cells}[$row][$col] = $cell;
+ }
+ }
+
+ $sheet->{MinRow} = 0 if $sheet->{MinRow} > $sheet->{MaxRow};
+ $sheet->{MinCol} = 0 if $sheet->{MinCol} > $sheet->{MaxCol};
+
+ }
+
+ return $self;
+}
+
+# Convert cell name in the format AA1 to a row and column number.
+
+sub __decode_cell_name {
+ my ($letter1, $letter2, $digits) = @_;
+
+ my $col = ord($letter1) - 65;
+
+ if ($letter2) {
+ $col++;
+ $col *= 26;
+ $col += (ord($letter2) - 65);
+ }
+
+ my $row = $digits - 1;
+
+ return ($row, $col);
+}
+
+
+sub __load_shared_strings {
+ my ($zip, $converter) = @_;
+
+ my $member_shared_strings = $zip->memberNamed('xl/sharedStrings.xml');
+
+ my @shared_strings = ();
+
+ if ($member_shared_strings) {
+
+ my $mstr = $member_shared_strings->contents;
+ $mstr =~ s/<t\/>/<t><\/t>/gsm; # this handles an empty t tag in the xml <t/>
+ foreach my $si ($mstr =~ /<si.*?>(.*?)<\/si/gsm) {
+ my $str;
+ foreach my $t ($si =~ /<t.*?>(.*?)<\/t/gsm) {
+ $t = $converter->convert($t) if defined $converter;
+ $str .= $t;
+ }
+ push @shared_strings, $str;
+ }
+ }
+
+ return \@shared_strings;
+}
+
+
+sub __load_styles {
+ my ($zip) = @_;
+
+ my $member_styles = $zip->memberNamed('xl/styles.xml');
+
+ my @styles = ();
+ my %style_info = ();
+
+ if ($member_styles) {
+ my $formatter = Spreadsheet::XLSX::Fmt2007->new();
+
+ foreach my $t ($member_styles->contents =~ /xf\ numFmtId="([^"]*)"(?!.*\/cellStyleXfs)/gsm) { #"
+ push @styles, $t;
+ }
+
+ my $default = $1 || '';
+
+ foreach my $t1 (@styles) {
+ $member_styles->contents =~ /numFmtId="$t1" formatCode="([^"]*)/;
+ my $formatCode = $1 || '';
+ if ($formatCode eq $default || not($formatCode)) {
+ if ($t1 == 9 || $t1 == 10) {
+ $formatCode = '0.00000%';
+ } elsif ($t1 == 14) {
+ $formatCode = 'yyyy-mm-dd';
+ } else {
+ $formatCode = '';
+ }
+# $formatCode = $formatter->FmtStringDef($t1);
+ }
+ $style_info{$t1} = $formatCode;
+ $default = $1 || '';
+ }
+
+ }
+ return (\@styles, \%style_info);
+}
+
+
+sub __load_rels {
+ my ($zip) = @_;
+
+ my $member_rels = $zip->memberNamed('xl/_rels/workbook.xml.rels') or die("xl/_rels/workbook.xml.rels not found in this zip\n");
+
+ my %rels = ();
+
+ foreach ($member_rels->contents =~ /\<Relationship (.*?)\/?\>/g) {
+
+ my ($id, $target);
+ ($id) = /Id="(.*?)"/;
+ ($target) = /Target="(.*?)"/;
+
+ if (defined $id and defined $target) {
+ $rels{$id} = $target;
+ }
+
+ }
+
+ return \%rels;
+}
+
+sub __load_zip {
+ my ($filename) = @_;
+
+ my $zip = Archive::Zip->new();
+
+ if (ref $filename) {
+ $zip->readFromFileHandle($filename) == Archive::Zip::AZ_OK or die("Cannot open data as Zip archive");
+ } else {
+ $zip->read($filename) == Archive::Zip::AZ_OK or die("Cannot open $filename as Zip archive");
+ }
+
+ return $zip;
+}
+
+
+1;
+__END__
+
+=head1 NAME
+
+Spreadsheet::XLSX - Perl extension for reading MS Excel 2007 files.
+
+=head1 SYNOPSIS
+
+ use Text::Iconv;
+ my $converter = Text::Iconv->new("utf-8", "windows-1251");
+
+ # Text::Iconv is not really required.
+ # This can be any object with the convert method. Or nothing.
+
+ use Spreadsheet::XLSX;
+
+ my $excel = Spreadsheet::XLSX->new('test.xlsx', $converter);
+
+ foreach my $sheet (@{$excel->{Worksheet}}) {
+
+ printf("Sheet: %s\n", $sheet->{Name});
+
+ $sheet->{MaxRow} ||= $sheet->{MinRow};
+
+ foreach my $row ($sheet->{MinRow} .. $sheet->{MaxRow}) {
+
+ $sheet->{MaxCol} ||= $sheet->{MinCol};
+
+ foreach my $col ($sheet->{MinCol} .. $sheet->{MaxCol}) {
+
+ my $cell = $sheet->{Cells}[$row][$col];
+
+ if ($cell) {
+ printf("( %s , %s ) => %s\n", $row, $col, $cell->{Val});
+ }
+
+ }
+
+ }
+
+ }
+
+=head1 DESCRIPTION
+
+This module is a (quick and dirty) emulation of L<Spreadsheet::ParseExcel> for
+Excel 2007 (.xlsx) file format. It supports styles and many of Excel's quirks,
+but not all. It populates the classes from L<Spreadsheet::ParseExcel> for interoperability;
+including Workbook, Worksheet, and Cell.
+
+=head1 SEE ALSO
+
+=over 2
+
+=item L<Spreadsheet::ParseXLSX>
+
+This module (Spradsheet::XLSX) has some serious issues with the way it uses regexs for parsing the XML.
+I would strongly encourage switching to L<Spreadsheet::ParseXLSX> which takes a more reliable approach.
+
+=item L<Text::CSV_XS>, L<Text::CSV_PP>
+
+=item L<Spreadsheet::ParseExcel>
+
+=item L<Spreadsheet::ReadSXC>
+
+=item L<Spreadsheet::BasicRead>
+
+for xlscat likewise functionality (Excel only)
+
+=item Spreadsheet::ConvertAA
+
+for an alternative set of C<cell2cr()> / C<cr2cell()> pair
+
+=item L<Spreadsheet::Perl>
+
+offers a Pure Perl implementation of a
+spreadsheet engine. Users that want this format to be supported in
+L<Spreadsheet::Read> are hereby motivated to offer patches. It's not high
+on my todo-list.
+
+=item xls2csv
+
+L<https://metacpan.org/release/KEN/xls2csv-1.07> offers an alternative for my C<xlscat -c>,
+in the xls2csv tool, but this tool focusses on character encoding
+transparency, and requires some other modules.
+
+=item L<Spreadsheet::Read>
+
+read the data from a spreadsheet (interface module)
+
+=back
+
+=head1 AUTHOR
+
+Dmitry Ovsyanko, E<lt>do@eludia.ruE<gt>, http://eludia.ru/wiki/
+
+Patches by:
+
+ Steve Simms
+ Joerg Meltzer
+ Loreyna Yeung
+ Rob Polocz
+ Gregor Herrmann
+ H.Merijn Brand
+ endacoe
+ Pat Mariani
+ Sergey Pushkin
+
+=head1 ACKNOWLEDGEMENTS
+
+ Thanks to TrackVia Inc. (http://www.trackvia.com) for paying for Rob Polocz working time.
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2008 by Dmitry Ovsyanko
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.8.8 or,
+at your option, any later version of Perl 5 you may have available.
+
+=cut
diff --git a/lib/Spreadsheet/XLSX/Fmt2007.pm b/lib/Spreadsheet/XLSX/Fmt2007.pm index b4c9fba..4068e0f 100644 --- a/lib/Spreadsheet/XLSX/Fmt2007.pm +++ b/lib/Spreadsheet/XLSX/Fmt2007.pm @@ -1,14 +1,9 @@ -# This code is adapted for Excel 2007 from: -# Spreadsheet::XLSX::FmtDefault -# by Kawai, Takanori (Hippo2000) 2001.2.2 -# This Program is ALPHA version. -#============================================================================== package Spreadsheet::XLSX::Fmt2007; use strict; use warnings; use Spreadsheet::XLSX::Utility2007 qw(ExcelFmt); -our $VERSION = '0.17'; # +our $VERSION = '0.18'; # my %hFmtDefault = ( 0x00 => '@', @@ -52,7 +47,7 @@ my %hFmtDefault = ( ); #------------------------------------------------------------------------------ -# new (for Spreadsheet::XLSX::Utility2007) +# new (for Spreadsheet::XLSX::Fmt2007) #------------------------------------------------------------------------------ sub new { my ($sPkg, %hKey) = @_; @@ -62,7 +57,7 @@ sub new { } #------------------------------------------------------------------------------ -# TextFmt (for Spreadsheet::XLSX::Utility2007) +# TextFmt (for Spreadsheet::XLSX::Fmt2007) #------------------------------------------------------------------------------ sub TextFmt { my ($oThis, $sTxt, $sCode) = @_; @@ -71,7 +66,7 @@ sub TextFmt { } #------------------------------------------------------------------------------ -# FmtStringDef (for Spreadsheet::XLSX::Utility2007) +# FmtStringDef (for Spreadsheet::XLSX::Fmt2007) #------------------------------------------------------------------------------ sub FmtStringDef { my ($oThis, $iFmtIdx, $oBook, $rhFmt) = @_; @@ -85,7 +80,7 @@ sub FmtStringDef { } #------------------------------------------------------------------------------ -# FmtString (for Spreadsheet::XLSX::Utility2007) +# FmtString (for Spreadsheet::XLSX::Fmt2007) #------------------------------------------------------------------------------ sub FmtString { my ($oThis, $oCell, $oBook) = @_; @@ -122,7 +117,7 @@ sub FmtString { } #------------------------------------------------------------------------------ -# ValFmt (for Spreadsheet::XLSX::Utility2007) +# ValFmt (for Spreadsheet::XLSX::Fmt2007) #------------------------------------------------------------------------------ sub ValFmt { my ($oThis, $oCell, $oBook) = @_; @@ -140,7 +135,7 @@ sub ValFmt { } #------------------------------------------------------------------------------ -# ChkType (for Spreadsheet::XLSX::Utility2007) +# ChkType (for Spreadsheet::XLSX::Fmt2007) #------------------------------------------------------------------------------ sub ChkType { my ($oPkg, $iNumeric, $iFmtIdx) = @_; @@ -159,19 +154,39 @@ sub ChkType { __END__ +=head1 NAME + +Spreadsheet::XLSX::Fmt2007 - A class for Cell formats. + =head1 SYNOPSIS - $cell = $myworkbook->worksheet->{Cells}[$row][$col] - my $type = $cell->{Type}; # Date, Text, or Numeric - my $disp_value = $cell->Value; # displayed (formatted) value set in XLSX by $myFmt2007->ValFmt($cell, $workbook) - my $fund_value = $cell->{Val}; # fundemental (underlying) value - my $formatter; - if ($myworkbook->excel07) { - $formatter=Spreadsheet::XLSX::Fmt2007->new(); - } else { - $formatter=Spreadsheet::ParseExcel::FmtDefault->new(); - } - my $format_string = $formatter->FmtString($cell,$self->workbook); +See the documentation of L<Spreadsheet::XLSX>. + + my $cell = $myworkbook->worksheet->{Cells}[$row][$col] + my $type = $cell->{Type}; # Date, Text, or Numeric + my $disp_value = $cell->Value; # displayed (formatted) value set in XLSX by $myFmt2007->ValFmt($cell, $workbook) + my $fund_value = $cell->{Val}; # fundemental (underlying) value + my $formatter; + if( $myworkbook->excel07 ) { + $formatter = Spreadsheet::XLSX::Fmt2007->new(); + } else { + $formatter = Spreadsheet::ParseExcel::FmtDefault->new(); + } + my $format_string = $formatter->FmtString($cell, $self->workbook); + +=head1 DESCRIPTION + +This module is used in conjunction with L<Spreadsheet::XLSX>. See the documentation for L<Spreadsheet::XLSX>. + +This code is adapted for Excel 2007 from L<Spreadsheet::ParseExcel::FmtDefault> by Kawai, Takanori (Hippo2000) 2001-02-02. +This Program is ALPHA version. + +=head1 AUTHOR + +See the documentation for L<Spreadsheet::XLSX>. + +=head1 COPYRIGHT +See the documentation for L<Spreadsheet::XLSX>. =cut diff --git a/lib/Spreadsheet/XLSX/Utility2007.pm b/lib/Spreadsheet/XLSX/Utility2007.pm index bd9053f..1b1ed43 100644 --- a/lib/Spreadsheet/XLSX/Utility2007.pm +++ b/lib/Spreadsheet/XLSX/Utility2007.pm @@ -13,7 +13,7 @@ require Exporter; use vars qw(@ISA @EXPORT_OK); @ISA = qw(Exporter); @EXPORT_OK = qw(ExcelFmt LocaltimeExcel ExcelLocaltime col2int int2col sheetRef xls2csv); -our $VERSION = '0.17'; +our $VERSION = '0.18'; my $sNUMEXP = '(^[+-]?\d+(\.\d+)?$)|(^[+-]?\d+\.?(\d*)[eE][+-](\d+))$'; |