nb-build-index.bash

 1 #!/bin/bash
 2 
 3 # $Id: nb-build-index.bash,v 1.3 2005/01/29 19:14:52 schwehr Exp $
 4 
 5 # nanoblogger extension by Kurt Schwehr
 6 
 7 # This script goes through the .txt files in the current directory.
 8 # It takes the list of phrases and for each phrase generates a list of
 9 # links.  In the future, these numbers will hopefully match the blog
10 # entries, but for now, they are just sequential for each entry.
11 # It also does not currently handle wrapping or tags between words.
12 
13 # To get an initial list of words, you might do something like this,
14 # but know that it will not catch phrases like "San Diego"
15 #
16 # cat *.txt | tr '#&"=<>,.;:/\\' ' ' | tr ' \t' '\n\n'  | egrep '[a-z]' | sort -u > words
17 # cat *.txt | tr '$%^*?!()&#"=<>,.;:/\\' ' ' | tr "'" ' ' | tr ' \t' '\n\n'  | egrep '[a-z]' | sort -u > words
18 
19 # Bug: Sees my "AUTHOR: Kurt" in each.  Need to strip out the first 6 lines
20 
21 declare -ri EXIT_FAILURE=1
22 declare -ri EXIT_SUCCESS=0
23 
24 if [ $# != 1 ]; then
25     echo "ERROR: must specify phrase file"
26     exit $EXIT_FAILURE
27 fi
28 
29 phrases=$1
30 
31 declare -i num=0
32 
33 echo "Search for each phrase across all txt files"
34 cat $phrases | while read phrase; do
35     numstr=`printf "%05d" $num`
36     #echo -n "$numstr "
37     echo -n "$num "
38     echo $phrase > tmp-$numstr.phrase
39     # FIX: turn files into one line each
40     grep -l "$phrase" *.txt > tmp-$numstr.matches
41     num=$[num+1]
42 done
43 
44 echo 
45 echo "Assembling index from matches"
46 
47 
48 out=phrase-index.html
49 rm -f $out
50 for file in tmp-*.phrase; do
51     echo $file
52     num=1
53     base=${file%%.phrase}
54     phrase=`cat $file`
55     #echo phrase is $phrase
56     echo "<b>$phrase:</b>" >> $out
57     cat $base.matches | while read filematch; do
58 	month=`echo $filematch | cut -c1-7`
59 	echo "<a href=\"$month.html#e$filematch\">$num</a>" >> $out
60 	num=$[num+1]
61     done
62     echo "<br>" >> $out
63 done
64 
65 rm -f tmp-*.{phrase,matches}
syntax highlighted by Code2HTML, v. 0.9.1