78 lines
2.9 KiB
Plaintext
78 lines
2.9 KiB
Plaintext
|
#! /bin/sh
|
||
|
## Hit the major search engines. Hose the [large] output to a file!
|
||
|
## autoconverts multiple arguments into the right format for given servers --
|
||
|
## usually worda+wordb, with certain lame exceptions like dejanews.
|
||
|
## Extracting and post-sorting the URLs is highly recommended...
|
||
|
##
|
||
|
## Altavista currently handled by a separate script; may merge at some point.
|
||
|
##
|
||
|
## _H* original 950824, updated 951218 and 960209
|
||
|
|
||
|
test "${1}" = "" && echo 'Needs argument[s] to search for!' && exit 1
|
||
|
PLUSARG="`echo $* | sed 's/ /+/g'`"
|
||
|
PIPEARG="`echo ${PLUSARG} | sed 's/+/|/g'`"
|
||
|
IFILE=/tmp/.webq.$$
|
||
|
|
||
|
# Don't have "nc"? Get "netcat" from avian.org and add it to your toolkit.
|
||
|
doquery () {
|
||
|
echo GET "$1" | nc -v -i 1 -w 30 "$2" "$3"
|
||
|
}
|
||
|
|
||
|
# changed since original: now supplying port numbers and separator lines...
|
||
|
|
||
|
echo "=== Yahoo ==="
|
||
|
doquery "/bin/search?p=${PLUSARG}&n=300&w=w&s=a" search.yahoo.com 80
|
||
|
|
||
|
echo '' ; echo "=== Webcrawler ==="
|
||
|
doquery "/cgi-bin/WebQuery?searchText=${PLUSARG}&maxHits=300" webcrawler.com 80
|
||
|
|
||
|
# the infoseek lamers want "registration" before they do a real search, but...
|
||
|
echo '' ; echo "=== Infoseek ==="
|
||
|
echo " is broken."
|
||
|
# doquery "WW/IS/Titles?qt=${PLUSARG}" www2.infoseek.com 80
|
||
|
# ... which doesn't work cuz their lame server wants the extra newlines, WITH
|
||
|
# CRLF pairs ferkrissake. Fuck 'em for now, they're hopelessly broken. If
|
||
|
# you want to play, the basic idea and query formats follow.
|
||
|
# echo "GET /WW/IS/Titles?qt=${PLUSARG}" > $IFILE
|
||
|
# echo "" >> $IFILE
|
||
|
# nc -v -w 30 guide-p.infoseek.com 80 < $IFILE
|
||
|
|
||
|
# this is kinda flakey; might have to do twice??
|
||
|
echo '' ; echo "=== Opentext ==="
|
||
|
doquery "/omw/simplesearch?SearchFor=${PLUSARG}&mode=phrase" \
|
||
|
search.opentext.com 80
|
||
|
|
||
|
# looks like inktomi will only take hits=100, or defaults back to 30
|
||
|
# we try to suppress all the stupid rating dots here, too
|
||
|
echo '' ; echo "=== Inktomi ==="
|
||
|
doquery "/query/?query=${PLUSARG}&hits=100" ink3.cs.berkeley.edu 1234 | \
|
||
|
sed '/^<IMG ALT.*inktomi.*\.gif">$/d'
|
||
|
|
||
|
#djnews lame shit limits hits to 120 and has nonstandard format
|
||
|
echo '' ; echo "=== Dejanews ==="
|
||
|
doquery "/cgi-bin/nph-dnquery?query=${PIPEARG}+maxhits=110+format=terse+defaultOp=AND" \
|
||
|
smithers.dejanews.com 80
|
||
|
|
||
|
# OLD lycos: used to work until they fucking BROKE it...
|
||
|
# doquery "/cgi-bin/pursuit?query=${PLUSARG}&maxhits=300&terse=1" \
|
||
|
# query5.lycos.cs.cmu.edu 80
|
||
|
# NEW lycos: wants the User-agent field present in query or it returns nothing
|
||
|
# 960206: webmaster@lycos duly bitched at
|
||
|
# 960208: reply received; here's how we will now handle it:
|
||
|
echo \
|
||
|
"GET /cgi-bin/pursuit?query=${PLUSARG}&maxhits=300&terse=terse&matchmode=and&minscore=.5 HTTP/1.x" \
|
||
|
> $IFILE
|
||
|
echo "User-agent: *FUCK OFF*" >> $IFILE
|
||
|
echo "Why: go ask todd@pointcom.com (Todd Whitney)" >> $IFILE
|
||
|
echo '' >> $IFILE
|
||
|
echo '' ; echo "=== Lycos ==="
|
||
|
nc -v -i 1 -w 30 twelve.srv.lycos.com 80 < $IFILE
|
||
|
|
||
|
rm -f $IFILE
|
||
|
exit 0
|
||
|
|
||
|
# CURRENTLY BROKEN [?]
|
||
|
# infoseek
|
||
|
|
||
|
# some args need to be redone to ensure whatever "and" mode applies
|