1 #!/bin/bash
 2 
 3 # $Id: nb-build-index.bash,v 1.2 2005/01/02 06:20:57 schwehr Exp $
 4 
 5 # nanoblogger extension by Kurt Schwehr
 6 
 7 # This script goes through the .txt files in the current directory.
 8 # It takes the list of phrases and for each phrase generates a list of
 9 # links.  In the future, these numbers will hopefully match the blog
10 # entries, but for now, they are just sequential for each entry.
11 # It also does not currently handle wrapping or tags between words.
12 
13 # To get an initial list of words, you might do something like this,
14 # but know that it will not catch phrases like "San Diego"
15 #
16 # cat *.txt | tr '#"=<>,.;:/\\' ' ' | tr ' \t' '\n\n'  | egrep '[a-z]' | sort -u > words
17 
18 # Sees my "AUTHOR: Kurt" in each.  Nedd to strip out the first 6 lines
19 
20 phrases=$1
21 
22 declare -i num=0
23 
24 cat $phrases | while read phrase; do
25     echo -n "$num "
26     echo $phrase > tmp-$num.phrase
27     # FIX: turn files into one line each
28     grep -l "$phrase" *.txt > tmp-$num.matches
29     num=$[num+1]
30 done
31 
32 echo 
33 echo "Assempting index from matches"
34 
35 
36 out=phrase-index.html
37 rm -f $out
38 for file in tmp-*.phrase; do
39     num=1
40     base=${file%%.phrase}
41     phrase=`cat $file`
42     #echo phrase is $phrase
43     echo "<b>$phrase:</b>" >> $out
44     cat $base.matches | while read filematch; do
45 	month=`echo $filematch | cut -c1-7`
46 	echo "<a href=\"$month.html#e$filematch\">$num</a>" >> $out
47 	num=$[num+1]
48     done
49     echo "<br>" >> $out
50 done


syntax highlighted by Code2HTML, v. 0.9.1