1 #!/bin/bash
2
3 # $Id: nb-build-index.bash,v 1.3 2005/01/29 19:14:52 schwehr Exp $
4
5 # nanoblogger extension by Kurt Schwehr
6
7 # This script goes through the .txt files in the current directory.
8 # It takes the list of phrases and for each phrase generates a list of
9 # links. In the future, these numbers will hopefully match the blog
10 # entries, but for now, they are just sequential for each entry.
11 # It also does not currently handle wrapping or tags between words.
12
13 # To get an initial list of words, you might do something like this,
14 # but know that it will not catch phrases like "San Diego"
15 #
16 # cat *.txt | tr '#&"=<>,.;:/\\' ' ' | tr ' \t' '\n\n' | egrep '[a-z]' | sort -u > words
17 # cat *.txt | tr '$%^*?!()&#"=<>,.;:/\\' ' ' | tr "'" ' ' | tr ' \t' '\n\n' | egrep '[a-z]' | sort -u > words
18
19 # Bug: Sees my "AUTHOR: Kurt" in each. Need to strip out the first 6 lines
20
21 declare -ri EXIT_FAILURE=1
22 declare -ri EXIT_SUCCESS=0
23
24 if [ $# != 1 ]; then
25 echo "ERROR: must specify phrase file"
26 exit $EXIT_FAILURE
27 fi
28
29 phrases=$1
30
31 declare -i num=0
32
33 echo "Search for each phrase across all txt files"
34 cat $phrases | while read phrase; do
35 numstr=`printf "%05d" $num`
36 #echo -n "$numstr "
37 echo -n "$num "
38 echo $phrase > tmp-$numstr.phrase
39 # FIX: turn files into one line each
40 grep -l "$phrase" *.txt > tmp-$numstr.matches
41 num=$[num+1]
42 done
43
44 echo
45 echo "Assembling index from matches"
46
47
48 out=phrase-index.html
49 rm -f $out
50 for file in tmp-*.phrase; do
51 echo $file
52 num=1
53 base=${file%%.phrase}
54 phrase=`cat $file`
55 #echo phrase is $phrase
56 echo "<b>$phrase:</b>" >> $out
57 cat $base.matches | while read filematch; do
58 month=`echo $filematch | cut -c1-7`
59 echo "<a href=\"$month.html#e$filematch\">$num</a>" >> $out
60 num=$[num+1]
61 done
62 echo "<br>" >> $out
63 done
64
65 rm -f tmp-*.{phrase,matches}
syntax highlighted by Code2HTML, v. 0.9.1