You might want to have a look at
PHP-Stats (
demo) method of detecting bots.
They use a file called bw.dat where spiders are defined:
<?php
$bw_dat_version=1;
$bw_dat_time=1105808245;
// Visualized name | tightens to try | tightens before the version | macrocategory
$bw_def=
Array(
Array('8484 Boston Project v 1.0','8484BostonProjectv1.0',null,'Spider'),
Array('A2BLocation','A2BLocation',null,'Spider'),
Array('Accoona-AI-Agent','Accoona-AI-Agent','Accoona-AI-Agent/','Spider'),
Array('Acoo Browser','AcooBrowser','MSIE','Explorer'),
Array('Alexa','ia_archiver',null,'Spider'),
Array('Alpha','Alpha',null,'Alpha'),
Array('Amazon','AMZNKAssocBot','AMZNKAssocBot/','Spider'),
Array('Amfibibot','Amfibibot','Amfibibot/','Spider'),
Array('AmigaVoyager','AmigaVoyager','AmigaVoyager/','AmigaVoyager'),
Array('Anonymization.Net','Anonymization.Net',null,'Spider'),
Array('AnoProxy','AnoProxy','AnoProxy','AnoProxy'),
Array('Antibot','antibot','antibot-V','Spider'),
Array('AOL NG','NG','NG/','Spider'),
Array('AOL','AOL',null,'AOL'),
Array('Appie','appie','appie','Spider'),
Array('Arachmo','Arachmo',null,'Spider'),
Array('Arachne','Arachne',null,'Arachne'),
Array('Arexx','Arexx',null,'Arexx'),
Array('archive.org_bot','archive.org_bot','archive.org_bot/','Spider'),
Array('Arianna','arianna.libero.itLinux','arianna.libero.itLinux/','Spider'),
Array('ASPseek','ASPseek','ASPseek/','Spider'),
Array('AskJeeves','AskJeeves',null,'Spider'),
Array('AskJeeves','AskJeeves','AskJeeves/','Spider'),
Array('Avant Browser','AvantBrowser','MSIE','Explorer'),
Array('Avant Browser','AvantBrowser',null,'Explorer'),
Array('AvantGo','AvantGo/',null,'AvantGo'),
Array('AWeb','AWeb',null,'AWeb'),
Array('Baiduspider','Baiduspider',null,'Spider'),
Array('Balihoo/Nutch','Balihoo/Nutch',null,'Spider'),
Array('BDFetch','BDFetch',null,'Spider'),
Array('BecomeBot','BecomeBot','BecomeBot/','Spider'),
Array('Beonex','Beonex','Beonex/','Mozilla'),
Array('BlackBerry 7100','BlackBerry7100/',null,'BlackBerry'),
Array('BlackBerry 7130','BlackBerry7130/',null,'BlackBerry'),
Array('BlackBerry 7200','BlackBerry7200/',null,'BlackBerry'),
Array('BlackBerry 7520','BlackBerry7520/',null,'BlackBerry'),
Array('BlackBerry 8700','BlackBerry8700/',null,'BlackBerry'),
Array('BlackBerry 8800','BlackBerry8800/',null,'BlackBerry'),
Array('Blogdimensionv','Blogdimensionv','Blogdimensionv','Spider'),
Array('BlogPulse','BlogPulse','BlogPulse(ISSpider-','Spider'),
Array('BlogsharesSpiders','BlogsharesSpiders','WolferizedV','Spider'),
Array('BlogSnowbot','blogsnowbot',null,'Spider'),
Array('Boitho-robot','boitho.com-dc','boitho.com-dc/','Spider'),
Array('Boitho-robot','boitho.com-robot','boitho.com-robot/','Spider'),
Array('Camino','Camino','Camino/','Mozilla'),
Array('Camino','Chimera','Chimera/','Mozilla'),
Array('CazoodleBot','CazoodleBot','CazoodleBot/','Spider'),
Array('Cerberian Drtrs','CerberianDrtrs','CerberianDrtrsVersion-','CerberianDrtrs'),
Array('CFC','parallelContextFocusCrawler','parallelContextFocusCrawler','Spider'),
Array('Charlotte','Charlotte','Charlotte/','Spider'),
Array('CLDSonyEricssonP800','CLDC-','CLDC-','CLDSonyEricssonP800'),
Array('Convera','ConveraCrawler','ConveraCrawler/','Spider'),
Array('Convera','ConveraInternetSpider','ConveraInternetSpiderV','Spider'),
Array('ConveraCrawler','ConveraCrawler',null,'Spider'),
Array('CorenSearchBot','CorenSearchBot','CorenSearchBot/','Spider'),
Array('Crazy Browser','CrazyBrowser','CrazyBrowser/','CrazyBrowser'),
Array('CSEHTMLValidator','CSEHTMLValidator(http://www.htmlvalidator.com/)',null,'Spider'),
Array('CUrl','curl','curl/','Grabber'),
Array('Dangerhiptop','Dangerhiptop/',null,'Spider'),
Array('D1GArabicEngine','D1GArabicEngine','D1GArabicEngine/','Spider'),
Array('DA','DA','DA','DA'),
Array('Daniel','Daniel',null,'Daniel'),
Array('DataCha0s','DataCha0s','DataCha0s/','Spider'),
Array('Delorie.comSES','Delorie.comSES',null,'Spider'),
Array('DiggerBot','DiggerBot',null,'Spider'),
Array('Dillo','Dillo','Dillo/','Dillo'),
Array('DNSRight','DNSRight',null,'Spider'),
Array('DoCoMo','DoCoMo','DoCoMo/','DoCoMo'),
Array('Doris','Doris','Doris/','Doris'),
Array('EasyDL','EasyDL','EasyDL/','Spider'),
Array('eDintorni','eDintorni',null,'Spider'),
Array('ELinks','ELinks','ELinks(','ELinks'),
Array('Emacs/W3','Emacs/W3','Emacs/W3/','Emacs/W3'),
Array('EnaBot','EnaBot','EnaBot/','Spider'),
Array('Enigma Browser','EnigmaBrowser',null,'EnigmaBrowser'),
Array('Epiphany','Epiphany','Epiphany/','Mozilla'),
Array('Epsilon','Epsilon','Epsilon/','Epsilon'),
Array('ETS','ETSv','ETSv','Spider'),
Array('Exabot','Exabot','Exabot/','Spider'),
Array('Exactseek-Crawler','exactseek-crawler','exactseek-crawler-','Spider'),
Array('Exactseek-Pagereaper','exactseek-pagereaper','exactseek-pagereaper-','Grabber'),
Array('Explorer','MSIE','MSIE','Explorer'),
Array('EZW','EZW','EZW/','EZW'),
Array('Falcon','falcon','falcon/','Spider'),
Array('FAST Enterprise Crawler','FASTEnterpriseCrawler','FASTEnterpriseCrawler/','Spider'),
Array('FAST-WebCrawler','FAST-WebCrawler','FAST-WebCrawler/','Spider'),
Array('Faxobot','Faxobot','Faxobot/','Spider'),
Array('Feedfetcher-Google','Feedfetcher-Google',null,'Spider'),
Array('FeedsterCrawler','FeedsterCrawler','FeedsterCrawler/','Spider'),
Array('FeedValidator','FeedValidator','FeedValidator/','Spider'),
Array('Findlinks','findlinks','findlinks/','Spider'),
Array('Firefox','Firebird','Firebird/','Mozilla'),
Array('Firefox','Firefox','Firefox/','Mozilla'),
Array('Firefox','Phoenix','Phoenix/','Mozilla'),
Array('FireTux','FireTux',null,'Spider'),
Array('FooBar','FooBar','FooBar/','FooBar'),
Array('FranklinLocator','FranklinLocator','FranklinLocator/','Spider'),
Array('Fscrawler','fscrawler','fscrawler/','Spider'),
Array('Galeon','Galeon','Galeon/','Galeon'),
Array('Gazz','gazz','gazz/','Spider'),
Array('getRAX','getRAXCrawler','getRAXCrawler','Spider'),
Array('Gigabot','Gigabot','Gigabot/','Spider'),
Array('Gigabot','GigabotSiteSearch','GigabotSiteSearch/','Spider'),
Array('Gigamega.bot','Gigamega.bot','Gigamega.bot/','Spider'),
Array('Goblin','Goblin','Goblin/','Spider'),
Array('Googlebot','Googlebot','Googlebot/','Spider'),
Array('Googlebot','Goolebot','Goolebot/','Spider'),
Array('Googlebot-AdSense','Mediapartners-Google','Mediapartners-Google/','Spider'),
Array('Googlebot-Image','Googlebot-Image','Googlebot-Image/','Spider'),
Array('Googlebot-Mobile','Googlebot-Mobile','Googlebot-Mobile/','Spider'),
Array('Google-Sitemaps','Google-Sitemaps','Google-Sitemaps/','Spider'),
Array('GoogleDesktop','GoogleDesktop','GoogleDesktop','Spider'),
Array('Grub-client','grub-client','grub-client-','Spider'),
Array('Grubcrawler','grubcrawler',null,'Spider'),
Array('HenryTheMiragoRobot','HenryTheMiragoRobot',null,'Spider'),
Array('Heritrix','heritrix','heritrix/','Spider'),
Array('Hogella','Hogella','Hogella/','Spider'),
Array('HP Web Print Smart','HPWebPrintSmart04b01.0.1.34',null,'Spider'),
Array('HTMLValidator_SEW','HTMLValidatorhttp://www.searchengineworld.com/',null,'Spider'),
Array('HTTPRetriever','HTTPRetriever','HTTPRetriever/','Spider'),
Array('IBM Almadem','almaden.ibm.com',null,'Spider'),
Array('IBrowse','IBrowse','IBrowse','IBrowse'),
Array('IBrowse','IBrowse/','IBrowse/','IBrowse'),
Array('Ichiro','ichiro/',null,'Spider'),
Array('IDBot','IDBot','IDBot/','Spider'),
Array('IEAutoDiscovery','IEAutoDiscovery',null,'Spider'),
Array('IlseBot','IlseBot/',null,'Spider'),
Array('IlTrovatore-Setaccio','IlTrovatore-Setaccio','IlTrovatore-Setaccio/','Spider'),
Array('IndyLibrary','IndyLibrary',null,'Spider'),
Array('InfoSeek Sidewinder','InfoSeekSidewinder','InfoSeekSidewinder/','Spider'),
Array('Inktomi slurp','slurp@inktomi.com',null,'Spider'),
Array('Innerprise','InnerpriseBot','InnerpriseBot/','Spider'),
Array('InternetSeer','InternetSeer.com',null,'Spider'),
Array('IRLbot','IRLbot','IRLbot/','Spider'),
Array('Irvine','Irvine','Irvine/','Grabber'),
Array('ISC Systemsi Rc Search 2.1','ISCSystemsiRcSearch2.1',null,'Spider'),
Array('ItaliaFacileBot','IFbot','IFbot/','Spider'),
Array('ItalianSpider','Spider-http://www.italianspider.com',null,'Spider'),
Array('J-PHONE','J-PHONE/','Y!J-SRD/','Spider'),
Array('JACK-O-LANTERN','JACK-O`-LANTERN','JACK-O`-LANTERN/','Spider'),
Array('Jakarta Commons - Http Client','JakartaCommons-HttpClient/','JakartaCommons-HttpClient/','JakartaCommons-HttpClient'),
Array('Java VM','Java','Java','Spider'),
Array('Java VM','Java/','Java/','Spider'),
Array('Jetbot','Jetbot','Jetbot/','Spider'),
Array('Jigsaw','Jigsaw','Jigsaw/','Spider'),
Array('Jyxobot','Jyxobot','Jyxobot/','Spider'),
Array('KMeleon','K-Meleon','K-Meleon/','Mozilla'),
Array('Konqueror','Konqueror','Konqueror/','Konqueror'),
Array('KummHttp','KummHttp','KummHttp/','Grabber'),
Array('Kylukacrawl','Kylukacrawl',null,'Spider'),
Array('LapozzBot','LapozzBot','LapozzBot/','Spider'),
Array('Larbin','larbin','larbin_','Spider'),
Array('libcurl-agent','libcurl-agent','libcurl-agent/','Spider'),
Array('Libwww-FM','libwww-FM','libwww-FM/','Spider'),
Array('Libwww-perl','libwww-perl','libwww-perl/','Spider'),
Array('Links','Links','Links(','Links'),
Array('LinkWalker','LinkWalker',null,'Spider'),
Array('LiteFinder','LiteFinder','LiteFinder/','Spider'),
Array('Livebot','Livebot',null,'Spider'),
Array('Lsearch','Lsearch','Lsearch/','Spider'),
Array('Lwp-trivial','lwp-trivial','lwp-trivial/','Spider'),
Array('Lycos_Spider','Lycos_Spider',null,'Spider'),
Array('Lynx','Lynx','Lynx/','Lynx'),
Array('Majestic12bot','MJ12bot','MJ12bot/v','Spider'),
Array('Mediapartners-Google','Mediapartners-Google','Mediapartners-Google/','Spider'),
Array('MetaTagRobot','MetaTagRobot','MetaTagRobot/','Spider'),
Array('MicrosoftD.A.I.P.P.P.D.','MicrosoftDataAccessInternetPublishingProviderProtocolDiscovery',null,'Spider'),
Array('MicrosoftURLControl','MicrosoftURLControl','MicrosoftURLControl-','Spider'),
Array('Missigua Locator 1.9','MissiguaLocator1.9',null,'Spider'),
Array('MJ12bot','MJ12bot','MJ12bot/','Spider'),
Array('MnoGoSearch','Mnogosearch','Mnogosearch-','Spider'),
Array('Mole/Intags','Mole2','Mole2/','Spider'),
Array('MOT','DRMIB','DRMIB/','Spider'),
Array('Mozdex','Mozdex','Mozdex/','Spider'),
Array('Mozilla','Gecko','rv:','Mozilla'),
Array('MSIECrawler','MSIECrawler',null,'Spider'),
Array('MSNBot','msnbot','msnbot/','Spider'),
Array('MSNBot-Media','msnbot-media','msnbot-media/','Spider'),
Array('MSNBot-News','msnbot-news','msnbot-news/','Spider'),
Array('MRSBOT','MSRBOT',null,'Spider'),
Array('MSIE999.1','MSIE999.1',null,'Spider'),
Array('MunaxBot','MunaxBot',null,'Spider'),
Array('NASA Search','NASASearch','NASASearch/','Spider'),
Array('NationalDirectory-WebSpider','NationalDirectory-WebSpider','NationalDirectory-WebSpider/','Spider'),
Array('NaverBot','NaverBot','NaverBot-','Spider'),
Array('NaverBot','NaverBot','NaverBot/','Spider'),
Array('NetcraftWebServer','NetcraftWebServerSurvey',null,'NetPositive'),
Array('NetPositive','NetPositive','NetPositive/','Spider'),
Array('NetResearchServer','NetResearchServer','NetResearchServer/','Spider'),
Array('Netscape 0.91 Beta','Mozilla/0.91Beta(Windows)',null,'Netscape'),
Array('Netscape','Mozilla','Mozilla/','Netscape'),
Array('Netscape','Netscape','Netscape/','Mozilla'),
Array('Netscape','Netscape','Netscape6/','Mozilla'),
Array('Netvibes','Netvibes','Netvibes','Spider'),
Array('NimbleCrawler','NimbleCrawler','NimbleCrawler/','Spider'),
Array('Noago Spider','NoagoSpider',null,'Spider'),
Array('Nokia 6600','Nokia6600','Nokia6600/','Nokia'),
Array('Nokia 6280','Nokia6280','Nokia6280/','Nokia'),
Array('Nokia 6610','Nokia6610','Nokia6610/','Nokia'),
Array('Nokia 6820','Nokia6820','Nokia6820/','Nokia'),
Array('Nokia 7110','Nokia7110','Nokia7110/','Nokia'),
Array('Nokia-WAPToolkit','Nokia-WAPToolkit','Nokia-WAPToolkit/','Nokia'),
Array('NPBot','NPBot',null,'Spider'),
Array('NRSbot','nrsbot','nrsbot/','Spider'),
Array('Nustcrape','Nustcrape','Nustcrape/','Spider'),
Array('Nutch','Nutch',null,'Spider'),
Array('NutchCVS','NutchCVS','NutchCVS/','Spider'),
Array('NutchOrg','NutchOrg','NutchOrg/','Spider'),
Array('Nutscrape','Nutscrape','Nutscrape/','Spider'),
Array('OceanConserve','OceanConserve',null,'Spider'),
Array('OmniExplorerBot','OmniExplorer_Bot','OmniExplorer_Bot/','Spider'),
Array('OmniWeb','OmniWeb','OmniWeb/','OmniWeb'),
Array('online link validator','onlinelinkvalidator',null,'Spider'),
Array('Openbot','Openbot','Openbot/','Spider'),
Array('OpenISearch','OpenISearch','OpenISearch/','Spider'),
Array('Opera','Opera','Opera','Opera'),
Array('Opera','Opera','Opera/','Opera'),
Array('Opera Mini','OperaMini','OperaMini/','Opera'),
Array('Oregano','Oregano','Oregano/','Oregano'),
Array('Pagebull','Pagebullhttp://www.pagebull.com/',null,'Spider'),
Array('PagmIEDownload','PagmIEDownload',null,'Grabber'),
Array('ParaSite','ParaSite','ParaSite/','Spider'),
Array('Pavuk','pavuk','pavuk/','Grabber'),
Array('PBrowse','PBrowse','PBrowse','PBrowse'),
Array('PEERbot','PEERbot',null,'Spider'),
Array('PeopleChat','PeopleChat',null,'Spider'),
Array('PetitNavire','PetitNavire',null,'Spider'),
Array('PGWeb','PGWeb',null,'Spider'),
Array('PHP','PHP','PHP/','Spider'),
Array('PicoSearch','PicoSearch','PicoSearch/','Spider'),
Array('PicSearchBot','psbot','psbot/','Spider'),
Array('Ping.blo.gs','ping.blo.gs','ping.blo.gs/','Spider'),
Array('PingALink','PingALink','PingALinkMonitoringServices','Spider'),
Array('pipeLiner','pipeLiner','pipeLiner/','Spider'),
Array('Pita','Pita',null,'Spider'),
Array('Pizilla','Pizilla',null,'Spider'),
Array('PlagiarBot','PlagiarBot','PlagiarBot/','Spider'),
Array('PlanetWeb','Planetweb','Planetweb/','PlanetWeb'),
Array('PlantyNet','PlantyNet','PlantyNet_WebRobot_V','Spider'),
Array('MS Pocket IE','MicrosoftPocketInternetExplorer','MicrosoftPocketInternetExplorer/','Explorer'),
Array('MS Pocket IE','MSPIE','MSPIE','Explorer'),
Array('Pockey','Pockey',null,'Spider'),
Array('POE-Component-Client-HTTP','POE-Component-Client-HTTP','POE-Component-Client-HTTP/','POE-Component-Client-HTTP'),
Array('Polybot','polybot','polybot','Spider'),
Array('Pompos','Pompos','Pompos/','Spider'),
Array('Popdexter','Popdexter','Popdexter/','Spider'),
Array('PortalB/Alacra','PortalBSpider','PortalBSpider/','Spider'),
Array('Portaljuice','PJspider','PJspider/','Spider'),
Array('PortHuronLabs','PortHuronLabs',null,'Spider'),
Array('Post.sk','pd02_','pd02_','Spider'),
Array('Potbot','potbot','potbot','Spider'),
Array('PythonUrlLib','Python-urllib','Python-urllib/','Spider'),
Array('ReqwirelessWeb','ReqwirelessWeb','ReqwirelessWeb/','ReqwirelessWeb'),
Array('Robozilla','Robozilla','Robozilla/','Spider'),
Array('RPT-HTTPClient','RPT-HTTPClient','Mozilla/4.5RPT-HTTPClient','Spider'),
Array('RSSOneEngine','RSSOneEngine','RSSOneEngine/','Spider'),
Array('Safari','Safari','Safari/','Safari'),
Array('Samsung-SGH-P910','SAMSUNG-SGH-P910','SAMSUNG-SGH-P910/','SAMSUNG-SGH-P910'),
Array('SBL-BOT','SBL-BOT',null,'Spider'),
Array('Schmozilla','Schmozilla','Schmozilla/v','Spider'),
Array('Scooter','Scooter','Scooter/','Spider'),
Array('Scrubby','Scrubby','Scrubby/','Spider'),
Array('Search.ch','search.ch','search.chV','Spider'),
Array('Seekbot','http://www.seekbot.net','Seekbot/','Spider'),
Array('Shelobv','shelobv',null,'Spider'),
Array('Sherlock','sherlock',null,'Spider'),
Array('Shim-Crawler','Shim-Crawler',null,'Spider'),
Array('SleekSpider','SleekSpider',null,'Spider'),
Array('SleipnirVersion','SleipnirVersion','SleipnirVersion','SleipnirVersion'),
Array('Slurp/cat','Slurp/cat',null,'Spider'),
Array('SlySearch','SlySearch','SlySearch/','Spider'),
Array('SmallProxy 3.2 Beta 19','SmallProxy3.2Beta19',null,'Spider'),
Array('Sna','mikeelliott@hotmail.com','sna-','Spider'),
Array('SnapPreviewBot','Mozilla/5.0(SnapPreviewBot)Gecko/20061206Firefox/1.5.0.9','null','Spider'),
Array('SnapShots','Snapbot','Snapbot/','Spider'),
Array('Snoopy','Snoopy','Snoopyv','Snoopy'),
Array('Sohu-search','sohu-search',null,'Spider'),
Array('SpeedySpider','SpeedySpider',null,'Spider'),
Array('SpeedySpider','SpeedySpider(http://www.entireweb.com/about/search_tech/speedy_spider/)',null,'Spider'),
Array('SpiderMonkey','SpiderMonkey','SpiderMonkey/','Spider'),
Array('Sproose','sproose','sproose/','Spider'),
Array('Sqworm','Sqworm','Sqworm/','Spider'),
Array('Steeler','Steeler/',null,'Spider'),
Array('SurfControl','SurfControl',null,'Spider'),
Array('SurveyBot','SurveyBot','SurveyBot/','Spider'),
Array('Szukacz','Szukacz','Szukacz/','Spider'),
Array('Tamu','TAMU_CS_IRL_CRAWLER','TAMU_CS_IRL_CRAWLER/','Spider'),
Array('TeamSoft WinInet Component','TeamSoftWinInetComponent',null,'TeamSoft WinInet Component'),
Array('Technoratibot','Technoratibot','Technoratibot/','Spider'),
Array('Teemer','Teemer',null,'Spider'),
Array('Teleport','Teleport','TeleportPro/','Grabber'),
Array('Teoma','Teoma',null,'Spider'),
Array('This_is_Browser','This_is_Browser','This_is_Browser/','Spider'),
Array('TinEye','TinEye','TinEye/','Spider'),
Array('TivraSpider','tivraSpider','tivraSpider/','Spider'),
Array('topicblogs','topicblogs','topicblogs/','Spider'),
Array('TurnitinBot','TurnitinBot','TurnitinBot/','Spider'),
Array('Twiceler','Twiceler','Twiceler','Spider'),
Array('TypeCore','TypeCore/',null,'Spider'),
Array('UbiCrawler','UbiCrawler','UbiCrawler/','Spider'),
Array('Ultraseek','Ultraseek',null,'Spider'),
Array('UndertheRainbow','UndertheRainbow','UndertheRainbow','Spider'),
Array('Updated','http://www.updated.com','updated/','Spider'),
Array('Vagabondo','Vagabondo',null,'Spider'),
Array('Versus','versus','versus','Spider'),
Array('VoilaBot','VoilaBot','VoilaBot/','Spider'),
Array('Vortex','Vortex','Vortex','Spider'),
Array('vspider','vspider',null,'Spider'),
Array('W3C_Validator','W3C_Validator','W3C_Validator/','Spider'),
Array('w3m','w3m','w3m/','w3m'),
Array('WaypathDevelopmentCrawler','Waypathdevelopmentcrawler',null,'Spider'),
Array('WDG_Validator','WDG_Validator','WDG_Validator/','Spider'),
Array('WebAlta Crawler','WebAltaCrawler','WebAltaCrawler/','Spider'),
Array('WebCapture','WebCapture',null,'Spider'),
Array('Webcollage','webcollage','webcollage/','Spider'),
Array('WebCopier','Webcopier','Webcopierv','Grabber'),
Array('WebCopier','WebCopier','WebCopierv','Grabber'),
Array('WebCPO','WebCPO',null,'Spider'),
Array('WebDAV','Microsoft-WebDAV-MiniRedir','Microsoft-WebDAV-MiniRedir/','Spider'),
Array('WebDownloader','WebDownloader','WebDownloader/','Grabber'),
Array('WebPix','WebPix','WebPix','Grabber'),
Array('WebReaper','WebReaper',null,'Grabber'),
Array('WEPSearch00','WEPSearch00',null,'Spider'),
Array('Websquash','websquash.com',null,'Spider'),
Array('WebStripper','WebStripper','WebStripper/','Grabber'),
Array('WebTV','WebTV','WebTV/','WebTV'),
Array('WebZIP','WebZIP','WebZIP/','Grabber'),
Array('Wells Search II','WellsSearchII',null,'Spider'),
Array('Wget','Wget','Wget/','Grabber'),
Array('WinHTTP Robot','WinHTTPRobot','WinHTTPRobot/','Spider'),
Array('WorQmada','WorQmada','WorQmada/','Spider'),
Array('Wotbox','Wotbox','Wotbox/','Spider'),
Array('WWW-Mechanize','WWW-Mechanize','WWW-Mechanize/','Spider'),
Array('XenuLinkSleuth','XenuLinkSleuth','XenuLinkSleuth','Spider'),
Array('Xerka WebBot','XerkaWebBot','XerkaWebBot/','Grabber'),
Array('xxx','xxx',null,'Spider'),
Array('Yahoo-Blogs','Yahoo-Blogs','Yahoo-Blogs/','Spider'),
Array('Yahoo!Mindset','Yahoo!Mindset',null,'Spider'),
Array('Yahoo!Slurp','Yahoo!Slurp',null,'Spider'),
Array('Yahoo!SlurpChina','Yahoo!SlurpChina',null,'Spider'),
Array('Yahoo-MMCrawler','Yahoo-MMCrawler',null,'Spider'),
Array('Yahoo Seeker','YahooSeeker','YahooSeeker/','Spider'),
Array('Yandex','Yandex',null,'Spider'),
Array('Yeti','Yeti','Yeti/','Spider'),
Array('ZeusWebsterPro','WebsterPro','WebsterProV','Spider'),
Array('ZipppBot','ZipppBot','ZipppBot/','Spider'),
Array('ZyBorg','ZyBorg','ZyBorg/','Spider')
);
?>
Other sources for spiders and bots:
http://www.botsvsbrowsers.com/
http://www.user-agents.org/index.shtml