diff options
Diffstat (limited to 'lib/Parse/Distname.pm')
-rw-r--r-- | lib/Parse/Distname.pm | 406 |
1 files changed, 406 insertions, 0 deletions
diff --git a/lib/Parse/Distname.pm b/lib/Parse/Distname.pm new file mode 100644 index 0000000..6965009 --- /dev/null +++ b/lib/Parse/Distname.pm @@ -0,0 +1,406 @@ +package Parse::Distname; + +use strict; +use warnings; +use Carp; +use Exporter 5.57 'import'; + +our $VERSION = '0.05'; +our @EXPORT_OK = qw/parse_distname/; + +our $SUFFRE = qr/\.(?:tgz|tbz|tar[\._-]gz|tar\.bz2|tar\.Z|zip)$/; + +sub parse_distname { + my $distname = shift; + + my %res; + + # Stringify first, in case $distname is some kind of an object + my $path = "$distname"; + $res{arg} = $path; + + # Small path normalization + $path =~ s!\\!/!g; + $path =~ s!//+!/!g; + $path =~ s!/\./!/!g; + + $path =~ s!^(.*?/)?(?:authors/)?id/!!; + + # Get pause_id + my ($pause_id, $author_dir); + + # A/AU/AUTHOR/Dist-Version.ext + if ($path =~ s!^(([A-Z])/(\2[A-Z0-9])/(\3[A-Z0-9-]{0,7})/)!!) { + $author_dir = $1; + $pause_id = $4; + } + # AUTHOR/Dist-Version.ext as a handy shortcut (esp. for testing) + elsif ($path =~ s!^([A-Z][A-Z0-9][A-Z0-9-]{0,7})/!!) { + $pause_id = $1; + $author_dir = join '/', + substr($pause_id, 0, 1), + substr($pause_id, 0, 2), + $pause_id, + ""; + } + # A little backward incompatibility here (id/A/AU/AUTHOR etc) + # but I believe nobody cares. + else { + $pause_id = ""; + + # Assume it's a local distribution + $author_dir = "L/LO/LOCAL/"; + } + $res{pause_id} = $pause_id; + $res{cpan_path} = "$author_dir$path"; + + # Now the path should be (subdir/)dist-version + if ($path =~ s!^(.+/)!!) { + $res{subdir} = $1; + + # Typical Perl6 distributions are located under Perl6/ directory + $res{perl6} = 1 if $res{subdir} =~ m!^Perl6/!; + } + + # PAUSE allows only a few extensions ($PAUSE::dist::SUFFQR + zip) + $path =~ s/($SUFFRE)//i or return; + $res{extension} = $1; + + $res{name_and_version} = $path; + + # Parse dist-version + my $info = _parse_distv($path); + $res{$_} = $info->{$_} for keys %$info; + + return \%res; +} + +sub _parse_distv { + my $distv = shift; + + my %res; + + # Remove potential -withoutworldwriteables suffix + $distv =~ s/-withoutworldwriteables$//; + + my $trial; + # Remove TRIAL (PAUSE::dist::isa_dev_version seems to be + # a little too strict) + if ($distv =~ s/([_\-])(TRIAL(?:[0-9]*|[_.\-].+))$//) { + $trial = [$1, $2]; + } + + # Remove RC for perl as well + my $rc; + if ($distv =~ /^perl/ and $distv =~ s/\-(RC[0-9]*)$//) { + $rc = $1; + } + + my $version; + # Usually a version, which starts with a number (or a 'v'-number), + # is the last part of the name. + if ($distv =~ s/\-((?:[vV][0-9]|[0-9.])[^-]*)$//) { + $version = $1; + } + # However, there may be a trailing part. + elsif ($distv =~ s/\-((?:[vV][0-9]|[0-9.])(?![A-Z]).*?)$//) { + $version = $1; + + # Special case + if ($distv eq 'perl' and $version !~ /\./) { + $distv = "$distv-$version"; + $version = undef; + } + } + + # If the name still contains a dot between numbers, + # it's probably a part of the version. + if ($distv =~ s/([_\.-]?)([vV]?[0-9]*\.[0-9]+.*)$//) { + my $separator = $1 || ''; + $version = defined $version ? "$2-$version" : $2; + $version =~ s/^\.//; + + # Special case + if ($distv =~ s/_v$//) { + $version = "v$separator$version"; + } + } + + # If we still don't have a version and the name has a tailing number + # with a small-letter prefix (other than 'v') + if (!defined $version and $distv =~ s/\-([a-z]+[0-9][0-9_]*)$//) { + $version = $1; + } + + # If we still don't have a version, and the name doesn't have a hyphen, + # and it has a tailing number... (and an occasional alpha/beta marker) + # (and the number is not a part of a few proper names) + if (!defined $version and $distv !~ /\-(?:S3|MSWin32|OS2|(?:[A-Za-z][A-Za-z0-9_]*)?SSL3)$/i and $distv =~ s/([_\.]?)([vV]?[0-9_]+[ab]?)$//) { + my $separator = $1; + $version = $2; + + # Special case + if (!$separator and $distv =~ s/_([a-z])$//) { + $version = "$1$version"; + } + } + + # Special case that should be put at the end + if (!defined $version and $distv =~ s/\-undef$//) { + $version = undef; + } + + my $dist = $distv; + + my $dev; + if ($dist eq 'perl') { + if ($version =~ /\d\.(\d+)(?:\D(\d+))?/) { + $dev = 1 if ($1 > 6 and $1 & 1) or ($2 and $2 >= 50); + } + if ($rc) { + $version = "$version-$rc"; + $dev = 1; + } + } + elsif (($version and $version =~ /\d\.\d+_\d/) or $trial) { + $dev = 1; + } + + if ($trial) { + $version = defined $version ? "$version$trial->[0]$trial->[1]" : $trial->[1]; + $dev = 1; + } + + # Normalize the Dist.pm-1.23 convention which CGI.pm and + # a few others use. + $dist =~ s/\.pm$//; + + # Remove apparent remnants that can't be a part of a package name + $dist =~ s/[\-\.]+$//; + + my $version_number; + if (defined $version) { + if ($version =~ /^([vV]?[0-9._]+)(?:\-|$)/) { + $version_number = $1; + $version_number =~ s/[\._]+$//; + } + } + + return { + name => $dist, + version => $version, + version_number => $version_number, + is_dev => $dev, + }; +} + +# for compatibility with CPAN::DistnameInfo + +sub new { + my ($class, $distname) = @_; + my $info = parse_distname($distname) || {}; + bless $info, $class; +} + +sub distname_info { + my $distname = shift; + my $info = parse_distname($distname); + @$info{qw/name version is_dev/}; +} + +sub dist { shift->{name} } +sub version { shift->{version} } +sub maturity { shift->{is_dev} ? 'developer' : 'released' } +sub filename { + my $self = shift; + join "", grep defined $_, @$self{qw/subdir name_and_version extension/}; +} +sub cpanid { shift->{pause_id} } +sub distvname { shift->{name_and_version} } +sub extension { substr(shift->{extension}, 1) } +sub pathname { shift->{arg} } + +sub properties { + my $self = shift; + my @methods = qw/ + dist version maturity filename + cpanid distvname extension pathname + /; + my %properties; + for my $method (@methods) { + $properties{$method} = $self->$method; + } + %properties; +} + +# extra accessors + +sub is_perl6 { shift->{perl6} } +sub version_number { shift->{version_number} } + +1; + +__END__ + +=encoding utf-8 + +=head1 NAME + +Parse::Distname - parse a distribution name + +=head1 SYNOPSIS + + use Parse::Distname 'parse_distname'; + my $info = parse_distname('ISHIGAKI/Parse-Distname-0.01.tar.gz'); + + # for compatibility with CPAN::DistnameInfo + my $info_obj = Parse::Distname->new('ISHIGAKI/Parse-Distname-0.01.tar.gz'); + say $info_obj->dist; # Parse-Distname + +=head1 DESCRIPTION + +Parse::Distname is yet another distribution name parser. It works +almost the same as L<CPAN::DistnameInfo>, but Parse::Distname takes +a different approach. It tries to extract a version part of a +distribution and treat the rest as a distribution name, contrary to +CPAN::DistnameInfo which tries to define a name part and treat +the rest as a version. + +Because of this difference, when Parse::Distname parses a weird +distribution name such as "AUTHOR/v1.0.tar.gz", it says the name +is empty and the version is "v1.0", while CPAN::DistnameInfo +says the name is "v" and the version is "1.0". See test files +in this distribution if you need more details. As of this writing, +Parse::Distname returns a different result for about 200+ +distributions among about 320000 BackPan distributions. + +=head1 FUNCTION + +Parse::Distname exports one function C<parse_distname> if requested. +It returns a hash reference, with the following keys as of this +writing: + +=over 4 + +=item arg + +The path you passed to the function. If what you passed is some kind +of an object (of Path::Tiny, for example), it's stringified. + +=item cpan_path + +A relative path to the distribution, whose base directory is +assumed CPAN/authors/id/. If org_path doesn't contain a pause_id, +the distribution is assumed to belong to LOCAL user. For example, + + say parse_distname('Dist-0.01.tar.gz')->{cpan_path}; + # L/LO/LOCAL/Dist-0.01.tar.gz + +If you only gives a pause_id, parent directories are supplemented. + + say parse_distname('ISHIGAKI/Dist-0.01.tar.gz')->{cpan_path}; + # I/IS/ISHIGAKI/Dist-0.01.tar.gz + +=item pause_id + +The pause_id of the distribution. Contrary to the above, this is +empty if you don't give a pause_id. + + say parse_distname('Dist-0.01.tar.gz')->{pause_id}; + # (undef, not LOCAL) + +=item subdir + +A PAUSE distribution may be put into a subdirectory under the author +directory. If the name contains such a subdirectory, it's kept here. + + say parse_distname('AUTHOR/sub/Dist-0.01.tar.gz')->{subdir}; + # sub + +Perl 6 distributions are (almost) always put under Perl6/ +subdirectory under each author's directory (with a few exceptions). + +=item name_and_version + +The name and version of the distribution, without an extension and +directory parts, which should not be empty as long as the +distribution has an extension that PAUSE accepts. + + say parse_distname('AUTHOR/sub/Dist-0.01.tar.gz')->{name_and_version}; + # Dist-0.01 + +=item name + +The name part of the distribution. This may be empty if no valid +name is found + + say parse_distname('AUTHOR/sub/Dist-0.01.tar.gz')->{name}; + # Dist + + say parse_distname('AUTHOR/v0.1.tar.gz')->{name}; + # (empty) + +=item version + +The version part of the distribution. This also may be empty, and +this may not always be a valid version, and may have a following +part such as C<-TRIAL>. + + say parse_distname('AUTHOR/Dist.tar.gz')->{version}; + # (undef) + + say parse_distname('AUTHOR/Dist-0.01-TRIAL.tar.gz')->{version}; + # 0.01-TRIAL + +=item version_number + +The first numerical part of the version. This also may be empty, and +this may not always be a valid version. + + say parse_distname('AUTHOR/Dist-0.01-TRIAL.tar.gz')->{version_number}; + # 0.01 + + say parse_distname('AUTHOR/Dist-0_0_1.tar.gz')->{version_number}; + # 0_0_1 + +=item extension + +The extension of the distribution. If no valid extension is found, +parse_distname returns false (undef). + +=item is_perl6 + +For convenience, if subdir exists and it starts with Perl6/, +this becomes true. + +=item is_dev + +If the version looks like C<\d+.\d+_\d+>, or contains C<-TRIAL>, +this becomes true. PAUSE treats such a distribution as a developer's +release and doesn't list it in its indices. + +=back + +=head1 METHODS + +For compatibility with CPAN::DistnameInfo, Parse::Distname has the +same methods/accessors, so you can use it as a drop-in replacement. + +In addition, C<is_perl6> and C<version_number> are available. + +=head1 SEE ALSO + +L<CPAN::DistnameInfo> + +=head1 AUTHOR + +Kenichi Ishigaki, E<lt>ishigaki@cpan.orgE<gt> + +=head1 COPYRIGHT AND LICENSE + +This software is copyright (c) 2018 by Kenichi Ishigaki. + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. + +=cut |