# /robots.txt file for http://www.pubinnguide.com #Examples... #This allows a crawler to access everywhere #User-agent: mr-webcrawler #Disallow: #This allows a crawler no access, as all URLs on a server start with '/' which is all of them #User-agent: mr-webcrawler #Disallow: / #This stops all robots visiting URLs with /images or /hub '*' means any other useragent #User-agent: * #Disallow: /images/ #Disallow: /hub/ #Other info from Wikipedia - http://en.wikipedia.org/wiki/Robots.txt #Crawl-delay: 30 - in seconds to wait between successive requests to the same server #Request-rate: 1/5 - maximum rate is one page every 5 seconds #Visit-time: 0600-0845 - only visit between 6:00 AM and 8:45 AM UT (GMT) #A L L O W E D #Allow Google Bot User-agent: Googlebot Disallow: Crawl-delay: 30 #D I S A L L O W E D #Disallow all spidering of images and admin pages User-agent: * Disallow: /images/ Disallow: /admin/ Disallow: /uploads/ Disallow: /adminpanel.asp Disallow: /login.asp Disallow: /logout.asp Disallow: /loggedoff.asp Disallow: /editproject.asp Disallow: /allprojects.asp Disallow: /upload.asp Disallow: /asplook.asp Disallow: /downloads.asp #Disallow Google Image Bot User-agent: Googlebot-Image Disallow: / #Disallow Twiceler Bot - Cuill User-agent: twiceler Disallow: / #Disallow Yahoo Image Bot User-agent: User-agent: Yahoo-MMCrawler Disallow: / #Disallow CazoodleBot - from University of Illinois User-agent: CazoodleBot Disallow: / #Disallow Baidu Bot (Japanese) User-agent: Baiduspider Disallow: / #Disallow NimbleCrawler (http://www.webmasterworld.com/forum93/858.htm) UserAgent: nimblecrawler Disallow: / #Disallow TurnITin - "This robot collects content from the Internet for the sole purpose of helping educational institutions prevent plagiarism" User-agent: TurnitinBot Disallow: / #Disallow Seekbot - http://www.seekport.co.uk/seekbot/ User-agent: Seekbot Disallow: / #Disallow Sogou- Chinese Search Engine User-agent: Sogou Disallow: / #Disallow Mirago.com User-agent: Mirago-Test-Robot (http://www.miragorobot.com) Disallow: / #Disallow MSN from seeing gifs and jpgsd User-agent: msnbot Disallow: /*.gif$ Disallow: /*.jpeg$ #Disallow All Bots to see in '/flash' folder User-agent: * Disallow: /flash