1 #!/bin/bash
2
3 # $Id: nb-build-index.bash,v 1.2 2005/01/02 06:20:57 schwehr Exp $
4
5 # nanoblogger extension by Kurt Schwehr
6
7 # This script goes through the .txt files in the current directory.
8 # It takes the list of phrases and for each phrase generates a list of
9 # links. In the future, these numbers will hopefully match the blog
10 # entries, but for now, they are just sequential for each entry.
11 # It also does not currently handle wrapping or tags between words.
12
13 # To get an initial list of words, you might do something like this,
14 # but know that it will not catch phrases like "San Diego"
15 #
16 # cat *.txt | tr '#"=<>,.;:/\\' ' ' | tr ' \t' '\n\n' | egrep '[a-z]' | sort -u > words
17
18 # Sees my "AUTHOR: Kurt" in each. Nedd to strip out the first 6 lines
19
20 phrases=$1
21
22 declare -i num=0
23
24 cat $phrases | while read phrase; do
25 echo -n "$num "
26 echo $phrase > tmp-$num.phrase
27 # FIX: turn files into one line each
28 grep -l "$phrase" *.txt > tmp-$num.matches
29 num=$[num+1]
30 done
31
32 echo
33 echo "Assempting index from matches"
34
35
36 out=phrase-index.html
37 rm -f $out
38 for file in tmp-*.phrase; do
39 num=1
40 base=${file%%.phrase}
41 phrase=`cat $file`
42 #echo phrase is $phrase
43 echo "<b>$phrase:</b>" >> $out
44 cat $base.matches | while read filematch; do
45 month=`echo $filematch | cut -c1-7`
46 echo "<a href=\"$month.html#e$filematch\">$num</a>" >> $out
47 num=$[num+1]
48 done
49 echo "<br>" >> $out
50 done
syntax highlighted by Code2HTML, v. 0.9.1