#!/usr/local/bin/perl # # qancord.pl # Clifton Pye # This program produces a lexical concordance for a unicode text file exported from Elan via Excel. # The exported text file should have a header row and be saved in the UTF-8 format with the .txt extension # Clear word hash %words = (); $extension = ".txt"; $concord = "con"; print "What is the file you wish to analyze? \n"; print "Type the filename without an extension \n"; print "The extension should be txt \n\n"; $textfile = <>; # Read the filename from the keyboard input chop $textfile; # Remove line return $file = $textfile . $extension; print "Now analyzing $textfile\n\n"; open text_in, "< $file" or die( "Could not open $file" ); $line = ; # Read header chomp($line); # Remove line returns @tiers = split ("\t", $line ); # Count the number of tiers if ( scalar( @tiers ) >= 4 ) { #Process files with 4 tiers while ($line = ) { $line_no = $line_no + 1; # Count the line number chomp($line); # Remove line returns @tiers = split ("\t", $line ); # Split line into tiers $adult = $tiers[ 2 ]; $adult = lc($adult); # Change to lower case $adult = ' ' . $adult . ' '; # Add spaces $adult =~ s/["(),;:.!?]/ /g; # Remove punctuation @words = split (" ", $adult ); # Put words into an array foreach $word (@words) { $words{$word} = $words{$word} . $tiers[ 0 ] . ' ' . $tiers[ 1 ] . ' ' . $tiers[ 2 ] . ' ' . $tiers[ 3 ] . '#'; # Put lines in hash } #end foreach word } #end while } #end if else { #Process files with 3 tiers while ($line = ) { $line_no = $line_no + 1; # Count the line number chomp($line); # Remove line returns @tiers = split ("\t", $line ); # Split line into tiers $adult = $tiers[ 1 ]; $adult = lc($adult); # Change to lower case $adult = ' ' . $adult . ' '; # Add spaces $adult =~ s/["(),;:.!?]/ /g; # Remove punctuation @words = split (" ", $adult ); # Put words into an array foreach $word (@words) { $words{$word} = $words{$word} . $tiers[ 0 ] . ' ' . $tiers[ 1 ] . ' ' . $tiers[ 2 ] . '#'; # Put lines in hash } #end foreach word } #end while } #end else close text_in; open text_out, "> $textfile . $concord" or die( "Could not open the output file" ); # Display the results foreach $word (sort keys %words) { # Sort the word hash @lines = split ("#", $words{$word} ); # Put lines into an array print text_out "$word"; # Print word foreach $line (@lines) { print text_out " $line\n"; # Print line } #end foreach line } #end foreach word