Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

Safe Merge module

0 views
Skip to first unread message

robic0

unread,
Mar 31, 2006, 7:53:59 PM3/31/06
to
I decided to post this module here, Smg.pm.
Smg.pm is first, then a trailing 'pl' or two for usage.
I wrote this after 2 months of Perl knowledge, and it shows.
It also shows a somewhat a past of large scale programming.
I originally wrote this to be used in conjunction with a
screen scraper I was going to write, that crawl's and strip all
images/mpeg's/movies et all, from porno site's.
With that I was going to start several porno sites with the
material. If you wan't to know more on this subject (how I can
legally do this) just ask.

Anyway, it only took me a week to write and stabalize.
Its thoroughly debugged, no errors whatsoever. Be carefull
that you fully understand the arcane code before you modify
it. The code is extremely powerfull, again, be carefull!

I will not be responsible in any way, if this code crashes your
hard disk, use at your own risk!!!!

I would be interested to know if you modify it, and what it
did for you.

Remember folks, this was designed to handle hardcore porno menutia,
not for anything else!

Set up two directories, use mrg_test1.pl (this uses time() for prefix),
then make a shortcut to it and put it into your taskbar.
Touch the taskbar when necessary during browsing.

I had envisioned multiple tools to handle auto and semi-auto porno
extraction and handling. This was the first one. Let me know if
you have any ideas for a tool bag that can be sold to the freaks
out there!!!

robic0
Lord of Porn

=====================================================================
=====================================================================
=====================================================================
smg.pm
-----------

package SMG;
use strict;

use File::stat;
use File::Path;
use File::Copy;
use File::Find;
use File::Spec;
use sort 'stable';


require Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(SafeMerge GetCommonElementsNxM GetCommonElements);
my $VERSION = 1.00;

#my $current = sort::current();

####################################################################################
# SAFE MERGE
# The two major options -
# 1. Merge 2 directories into 1, preserving/renaming duplicate named files.
# 2. Compare 2 directories and just rename the duplicates of 1 directory.
# Also -
# - The prefix level will be on a per-file monitored basis.
# - File/Dir names are cached and renamed in the "From" array,
# - Move is done via os-rename function into the "ToDir".
# - Remove "From" directory option (after rename).
# - Duplicate's-renaming-only is done in-place in the "FromDir".
# - Report output is created in the current directory (for now).
# - Itteration scheme are within-directory, then cross-directory until no more dups.
#-----------------------------------------------------------------------------------
sub SafeMerge ($$$$$$$)
{
return 0 if (@_ < 2 || @_ > 7);
my ($ToDir,$FromDir,$PrefixName,$PrefixLevel,$Exclude,$Duponly,$RmvFrom) = @_;

$PrefixName = '' unless defined $PrefixName; # Renaming Prefix
$PrefixLevel = 0 unless defined $PrefixLevel; # Number of times to += prefix

$Exclude = [] unless defined $Exclude; # Exclude list (must be re)
$Duponly = 0 unless defined $Duponly; # Flag to just rename dups in "from"
$RmvFrom = 0 unless defined $RmvFrom; # Flag to remove "from" after operation

my @To = ();
my @From = ();
my @ndx = ();
my @Xfound = ();
my $dupsize = @ndx;
my $passes = 0;

print "\n\n *******************************\n";
print "********[ Safe Merge ]*********\n";
print " *******************************\n";
print "\nPrefix\t$PrefixName\nLevel\t$PrefixLevel\nChecks ... ";

$ToDir = File::Spec->canonpath( "$ToDir" );
$FromDir = File::Spec->canonpath( "$FromDir" );
#print "$ToDir\n$FromDir";
if ($ToDir eq $FromDir) { print "\rFrom/To are identical: $ToDir\n"; return 0; }
if (!-e $ToDir) { print "\rTo directory does not exist: $ToDir\n"; return 0; }
if (!-e $FromDir) { print "\rFrom directory does not exist: $FromDir\n"; return 0;}

# Glob seems to be bothered by spaces in dir path names
my $Sep = "\\".File::Spec->canonpath( "/" );
{
my $td = File::Spec->canonpath( "$ToDir/*" );
my $fd = File::Spec->canonpath( "$FromDir/*" );
if ($td =~ / /) {
$td = "'$td'";
}
if ($fd =~ / /) {
$fd = "'$fd'";
}
foreach (glob ("$td")) {
/.+$Sep(.+)$/;
# /.+\/(.+)$/;
push (@To, [$1, 0]);
}
foreach (glob ("$fd")) {
/.+$Sep(.+)$/;
# /.+\/(.+)$/;
my $ftmp = $1;
for (@{$Exclude}) {
if ($ftmp =~ /$_/) {
# save in @Xfound if $RmvFrom is set (see below)
print "\rExclude - $ftmp\n";
push (@Xfound, $ftmp) if ($RmvFrom);
$ftmp = undef;
last;
}
}
push (@From, [$ftmp, 0, '']) if defined ($ftmp);
}
}

# Will return if "From" empty. Technically not an error.
# If it is empty through exclusions (regexp filter), "From" could be rmdir'd below.
if (@From == 0) { print "\rEmpty dir: $FromDir\n"; return 0; }
print "OK\n";


###**************************************************************************
## Iterate until there are no more dups across directories after (D2) rename
#
do {
@ndx = GetCommonElementsNxM(\@To,\@From, 1, 0);
$dupsize = @ndx;
$passes++;

print "\nP A S S \# $passes\n---------------------\n";
print "Found $dupsize dups:\n";

#**********************************
# Check for cross-directory dups
# -------------
if ($dupsize)
{
my $errflg = 0;
print "\n<DIR> $FromDir\n\n";
for (my $i = 0; $i < $dupsize; $i++)
{
my $Fromref = $From[$ndx[$i]];
my $pname = $PrefixName;

print "\t$Fromref->[0] ... $PrefixName";

#*****************************************
# Rename and check for in-directory dups
# ----------
while (1)
{
# Level check
if ($Fromref->[1] >= $PrefixLevel)
{
print "$Fromref->[0] <<level $Fromref->[1] exceeded>>\n";
# Clear the name in From so this file doesn't get moved
$Fromref->[0] = "";
$errflg = 1;
last;
}
my $stmp = uc($pname.$Fromref->[0]);
my $found = 0;
for (@From) { if (uc($_->[0]) eq $stmp) { $found = 1; last; } }

if (!$found) {
# OK to rename! Dup not found in "From"
# Change the name in "From" to this new one
# so it gets moved later. Increment its level of prefix
# ---------------------
print "$Fromref->[0]";
print "\n";
if ($Fromref->[1] == 0) { $Fromref->[2] = $Fromref->[0]; }
$Fromref->[0] = $pname.$Fromref->[0];
$Fromref->[1] += 1;
last;
}
else {
# File exist with that name, add another level of prefix.
# ---------------------
print "$PrefixName";
$pname = $pname.$PrefixName;
if ($Fromref->[1] == 0) { $Fromref->[2] = $Fromref->[0]; }
$Fromref->[1] += 1;
}
}
}
if ($errflg) {
print "!!! Can't rename some files, raise the level or change the prefix.\n";
#return 0;
}
}
} while ($dupsize > 0);
#
## End cross-dir iteration
###

#*************************************************************
# Check for rename of duplicates in D2 (only) without moving
# ------------
if ($Duponly)
{
my $rentried = 0;
my $renok = 0;
print "\nRename only ... ";
for (@From) {
my $res = 0;
if ($_->[1] > 0 && $_->[1] < $PrefixLevel) {
$rentried++;
my $fr = File::Spec->canonpath( "$FromDir/$_->[2]" );
my $fr2 = File::Spec->canonpath( "$FromDir/$_->[0]" );
if (!($res = rename ("$fr", "$fr2"))) {
print "Rename error:\t$_->[2] ... $_->[0]\n";
} else {
$renok++; }
}
}
if ($rentried) { print "$renok out of $rentried files OK\n"; }
else { print "No duplicates found\n"; }
return 1;
}

#**********************
# Move "From" into "To"
# -------------------
#print @From."@From\n";
my $sep = File::Spec->canonpath( "/" );
print "\nMoving:\t$FromDir$sep* to $ToDir$sep\n";
my $movcnt = 0;

for (@From) {
my $res = 0;
if ($_->[1] > 0 && $_->[1] < $PrefixLevel) {
my $fr = File::Spec->canonpath( "$FromDir/$_->[2]" );
my $to = File::Spec->canonpath( "$ToDir/$_->[0]" );
$res = File::Copy::move ("$fr", "$to");
if ($res == 0) { print "Move error:\t$_->[2] ... $to\n"; }
}
if ($_->[1] == 0 && $_->[0] gt '') {
my $fr = File::Spec->canonpath( "$FromDir/$_->[0]" );
my $to = File::Spec->canonpath( "$ToDir/$_->[0]" );
$res = File::Copy::move ("$fr", "$to");
if ($res == 0) { print "Move error:\t$fr\n" }
}
$movcnt++ if ($res);
}
my $notmoved = @From - $movcnt;
print "Moved:\t$movcnt out of ".@From." files.\n";

#***************************************
# Check if "FromDir" is to be deleted
# --------------
if ($RmvFrom) {
print "Remove:\t$FromDir ... ";
if ($notmoved > 0) {
print "not deleted, contains $notmoved file(s) that couldn't be moved!\n";
}
else {
# check if @Xfound has values (and their not directories)
# if so, delete these first before trying to remove the From directory
# for now, don't want to "unlink" a directory, and we're not doing a tree here
# ----------------
for (@Xfound) {
my $fr = File::Spec->canonpath( "$FromDir/$_" );
unlink ("$fr") if (!-d $fr) ; }
my $fr = File::Spec->canonpath( "$FromDir" );
if (!rmdir "$fr") {print "$!\n" } else {print "OK\n"; }
}
}
return 1;
}


#######################################################
# Get Common Elements (from two N-dimensioned Array's)
# IN - Refs to the NxN arrays to compare,
# sort flag and the compare field.
# OUT - Ndx's into Right_Array of matching elements
# ---------------------------------------------------
# Notes -
# 1. Elements are assumed textual and case insensitive
# 2. Ignores in-array duplicates
# 3. Sort will be done if sort flag > 0
#
sub GetCommonElementsNxM($$$$)
{
my ($A_Left,$A_Right,$Srtflg,$Fld) = @_;
$Srtflg = 0 unless defined $Srtflg;
$Fld = 0 unless defined $Fld;
# my @Dup = ();
my @Ndx = ();

if ($Srtflg > 0) {
@{$A_Left} = sort {uc($a->[$Fld]) cmp uc($b->[$Fld])} @{$A_Left};
@{$A_Right} = sort {uc($a->[$Fld]) cmp uc($b->[$Fld])} @{$A_Right};
} else {print "==> Common Elements : Not sorting arrays\n";}

my $rpos = 0;
my $rend = @{$A_Right};
my $cnt = 0;
my $llast = undef;
my $rlast = undef;
foreach my $left_element (@{$A_Left})
{
next if (uc($left_element->[$Fld]) eq uc($llast->[$Fld]));

$rpos += $cnt;
$cnt = 0;
foreach my $right_element (@{$A_Right}[$rpos..($rend-1)])
{
last if (uc($left_element->[$Fld]) lt uc($right_element->[$Fld]));
$cnt++;
next if (uc($right_element->[$Fld]) eq uc($rlast->[$Fld]));
if (uc($left_element->[$Fld]) eq uc($right_element->[$Fld]))
{
# push (@Dup, $right_element->[$Fld]); # the string
push (@Ndx, $rpos+$cnt-1); # the index into R_Array
last;
}
$rlast = $right_element;
}
$llast = $left_element;
last if ($rpos >= $rend);
}
# return (@Dup);
return (@Ndx);
}


#######################################################
# Get Common Elements from single Array's
# IN - Refs to the Nx1 arrays to compare, sort flag
# OUT - Ndx's into Right_Array of matching elements
# ---------------------------------------------------
# Notes -
# 1. Elements are assumed textual and case insensitive
# 2. Ignores in-array duplicates
# 3. Sort will be done if sort flag > 0
#######################################################
sub GetCommonElements($$$)
{
my ($A_Left,$A_Right,$Srtflg) = @_;
$Srtflg = 0 unless defined $Srtflg;
# my @Dup = ();
my @Ndx = ();

if ($Srtflg > 0) {
@{$A_Left} = sort {uc($a) cmp uc($b)} @{$A_Left};
@{$A_Right} = sort {uc($a) cmp uc($b)} @{$A_Right};
} else {print "==> Common Elements : Not sorting arrays\n";}

my $rpos = 0;
my $rend = @{$A_Right};
my $cnt = 0;
my $llast = '';
my $rlast = '';
foreach my $left_element (@{$A_Left})
{
next if (uc($left_element) eq uc($llast));

$rpos += $cnt;
$cnt = 0;
foreach my $right_element (@{$A_Right}[$rpos..($rend-1)])
{
last if (uc($left_element) lt uc($right_element));
$cnt++;
next if (uc($right_element) eq uc($rlast));
if (uc($left_element) eq uc($right_element))
{
# push (@Dup, $right_element); # the string
push (@Ndx, $rpos+$cnt-1); # the index into R_Array
last;
}
$rlast = $right_element;
}
$llast = $left_element;
last if ($rpos >= $rend);
}
# return (@Dup);
return (@Ndx);
}

=====================================================================
=====================================================================
=====================================================================
mrg_test1.pl
--------------
use strict;
require SMG;

use File::stat;
use File::Spec;
use File::Path;
use File::Copy;
use File::Find;
use sort 'stable';

my $VERSION = 1.00;

my $current = sort::current();
#print "\n==> sort : $current\n\n";

# Test smerge alone
#-----------------------
if (1)
{
#
my $ToDir = 'D:\agent\t1';
my $FromDir = 'D:\agent\t2';
my $PrefixName = time()."_";
my $PrefixLevel = 5;
my @Exclude = ('\.jbf$', '\.alb$'); # if all goes ok, these will be rmdir'd with the from, but not moved
my $Duponly = 0;
my $RmvFrom = 0;

if (SMG::SafeMerge ($ToDir,$FromDir,$PrefixName,$PrefixLevel,\@Exclude,$Duponly,$RmvFrom)) {
print "\nSafe Merge exited ok.\n";
} else { print "\nSafe Merge had a headache.\n"; }

}

=====================================================================
=====================================================================
=====================================================================
mrg_test.pl
-------------

use strict;
require SMG;

use File::stat;
use File::Spec;
use File::Path;
use File::Copy;
use File::Find;
use sort 'stable';

my $VERSION = 1.00;

my $current = sort::current();
#print "\n==> sort : $current\n\n";

# Batch create some directories
# (just hit return to go to next thing)
#------------------------------------------
if (1)
{
print "\n\n** B A T C H C R E A T E D I R E C T O T I E S **\n\n";
while (1)
{
print "> Enter path/seed and start/end number ( path/seed , # , # )\n? ";
my $Sep = "\\".File::Spec->canonpath( "/" );
my $input = '';
chomp ($input = <>);

# no time for error check, input better be right or else
#
my @inp = split (',', $input, 3);
if (@inp == 3)
{
$inp[0] =~ s/^[ ]+//; $inp[0] =~ s/[ ]+$//; # remove leading/trailing spaces
$inp[0] = File::Spec->canonpath( $inp[0] );
# get the seed
$inp[0] =~ /.+$Sep(.+)$/;
my $seed = $1;
$inp[1] =~ s/^[ ]+//; $inp[1] =~ s/[ ]+$//;
$inp[2] =~ s/^[ ]+//; $inp[2] =~ s/[ ]+$//;
print "$seed, $inp[1], $inp[2]\nPress return to confirm ... "; <>;
for (my $i = $inp[1]; $i <= $inp[2]; $i++)
{
my $dir = "$inp[0]$i";
my $res = mkdir ("$dir");
print "$res\t$dir\n";
}
}
last if (@inp == 0);
}
print "\n\n";
}

# Test smerge
#--------------------------------------------------------------------------------
if (1)
{
#*******************************************
# Defaults
# -------------------------------------
my $PrefixName = "use_time";
my $PrefixLevel = 8;
my @PNL = ($PrefixName, $PrefixLevel);
# exclusions will be unlinked (not moved) if "from" deleted
my @Exclude = ('\.jbf', '\.alb');
my $Duponly = 0; # flag - nothing moved, duplicates renamed in "from" dir
my $RmvFrom = 1; # flag - "from" directory deleted after move
#*******************************************

print "\n** S M E R G **\n\n";
my $Sep = "\\".File::Spec->canonpath( "/" );
# Container level D or S
my $Container_level = '';
my $inp = '';
while (1) {
print "> S)ingle or D)ual container ? ";
chomp ($inp = <>);
$Container_level = uc ($inp);
last if ($Container_level eq 'S' || $Container_level eq 'D');
if ($Container_level eq '') {
print "\ndone!\n";
<>;exit;
}
}
# Get the outer container directory
my $Main_dir = '';
while (1) {
print "> Full path ? ";
chomp ($inp = <>);
#$inp =~ s/^[ ]*(.*[^ ])[ ]*$/$1/; # remove leading/trailing spaces
$inp =~ s/^[ ]+//; $inp =~ s/[ ]+$//;

last if (-d ($Main_dir = $inp));
print "\t$Main_dir not found\n";
}

# Clean up directory name
$Main_dir = File::Spec->canonpath( $Main_dir ) ;

# Get BIG's sub-dirs
my @Cbig = ();
if ($Container_level eq 'D') {
print "\nGetting outer level directories ...\n";
my $md = File::Spec->canonpath( "$Main_dir/*" );
if ($md =~ / /) { $md = "'$md'"; }
foreach (glob ("$md")) {
#/.+$Sep(.+)$/;
push (@Cbig, $_) if (-d $_);
}
}
else {push (@Cbig, $Main_dir);}

###**********************************
## Outer level Loop
#
my ($cnt,$res);
for (@Cbig) {
my $bdir = $_;
print "\n> Process -> $bdir\n? ";
chomp ($res = <>);
next if (uc($res) ne 'Y');

###**************************************
## Inner level loop (until user done)
# Allow only 2 selections at a time
# show refreshed dir list each loop
#
while (1)
{
my @Csmall = ();
my $bd = File::Spec->canonpath( "$bdir/*" );
#print "+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+\n";
#print "$bdir\n";
if ($bd =~ / /) { $bd = "'$bd'"; }
foreach (glob ("$bd"))
{
if (-d $_) {
/.+$Sep(.+)$/;
my $sdir = $1;
my $sdfiles = 0;
# get the number of files in sd
my $sd = File::Spec->canonpath( "$bdir/$sdir/*" );
#print "$sd\n";
if ($sd =~ / /) { $sd = "'$sd'"; }
foreach (glob ("$sd")) { $sdfiles++; }
push (@Csmall, [$sdir, $sdfiles]);
#print "$_\n";
}
}
GetPairs:
# display the directory name without path
# and the number of files in it
print "+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+\n";
print "$bdir\n";
$cnt = 1;
for (@Csmall) {
printf ("%2d [ %6d ] %s\n", $cnt, $_->[1], $_->[0]);
$cnt++;
}
if (@Csmall < 2) { # need 2 to merge
print "Only 1 directory ...\n";
last;
}
# Get 2 dirs
# ----------
my @pair = ();
while (1) {
print "\n> Which pair to/from ( # # ) ? ";
chomp ($res = <>);
@pair = split (' ', $res, 2);
last if (@pair == 0 ||
(@pair == 2 &&
$pair[0] > 0 && $pair[0] <= @Csmall &&
$pair[1] > 0 && $pair[1] <= @Csmall &&
$pair[0] != $pair[1]));
print "input error: @pair\n";
}
last if (@pair == 0);

# here we have the two directories
# call smrg here, then rescan this dir
# until user is done.

my $ToDir = File::Spec->canonpath( "$bdir/$Csmall[$pair[0]-1]->[0]" );
my $FromDir = File::Spec->canonpath( "$bdir/$Csmall[$pair[1]-1]->[0]" );

print "To:\t$ToDir\n";
print "From:\t$FromDir\n";

while (1) {
print "- - - - - - - - -- - - - - - - - -\n";
print "S)elect a different to/from pair\n";
print "P)refix Name (or 'use_time') and Level:\t\t$PNL[0] , $PNL[1]\n";
print "E)xclusions (regexp):\t\t";
$cnt = 1;
if (@Exclude) {
for (@Exclude) { print "$_"; print " , " if $cnt < @Exclude; $cnt++ }
} else {
print "example \\\.extension\$"; }
print "\n";
print "R)ename duplicates only:\t$Duponly\n"; # toggle
print "D)elete From directory:\t\t$RmvFrom\n"; # toggle
print "> S P E R D or return to accept ? ";
chomp ($res = <>);
last if ($res eq '');
$res = uc($res);
goto GetPairs if ($res eq 'S');
$Duponly = 1-$Duponly if ($res eq 'R');
$RmvFrom = 1-$RmvFrom if ($res eq 'D');
if ($res eq 'P') {
### Get Prefix name and level
my @pnl = ();
my $rs;
while (1) {
print "> ( $PNL[0] , $PNL[1] ) ? ";
chomp ($rs = <>);
@pnl = split (',', $rs, 2);
last if (@pnl == 0);
if (@pnl == 2) {
### remove leading/trailing spaces
$pnl[0] =~ s/^[ ]+//; $pnl[0] =~ s/[ ]+$//;
$pnl[1] =~ s/^[ ]+//; $pnl[1] =~ s/[ ]+$//;
if ($pnl[0] gt '' && $pnl[1] gt '') {
@PNL = @pnl[0..1];
last;
}
}
print "input error: @pnl\n";
}
}
if ($res eq 'E') {
### Get Exclusions
my @exclude = ();
my $rs;
print "> ( ";
$cnt = 1;
for (@Exclude) { print "$_"; print " , " if $cnt < @Exclude; $cnt++ }
print " ) ? ";
chomp ($rs = <>);
@exclude = split (',', $rs);
### remove leading/trailing spaces
for (my $i = 0; $i < @exclude; $i++) {
### remove leading/trailing spaces
$exclude[$i] =~ s/^[ ]+//; $exclude[$i] =~ s/[ ]+$//; }
if (@exclude) { @Exclude = @exclude[0..(@exclude-1)]; }
}
}
my $prefn = '';
$PrefixName = $prefn = $PNL[0];
$PrefixLevel = $PNL[1];
$prefn = time()."_" if (lc(@PNL[0]) eq "use_time");

if (SMG::SafeMerge ( $ToDir,
$FromDir,
$prefn,
$PrefixLevel,
\@Exclude,
$Duponly,
$RmvFrom))
{print "\nSafe Merge returned OK.\n";}
else {print "\nSafe Merge had a headache.\n";}
print "Press return to continue ...\n"; $res = <>;
}
}
print "done!\n";
<>;exit;
}

robic0 - porno god

0 new messages