python 2.7 - Cannot update table , when comparing data between two cursors -
i wanted compare rows of table find out if equal or not , did create 2 cursors 1. select links table visted = yes 2. select links table visted = no using loop , if statement want compare visited links not visited links if equal or not , if equal update visted of link "yes" not done yet (my aim exit program if links visted , marked yes or cursor " visited=no " returns null value) portion code:
import sys import mysqldb import urllib import urlparse import re import htmlparser htmlparser import htmlparseerror bs4 import beautifulsoup mydb = mysqldb.connect(host='localhost', user='root', passwd='shailang', db='mydb') cursor = mydb.cursor() def process2(url): flag=0 cursor.execute("select links data_urls visited = 'ye'") yes_rows = cursor.fetchall() cursor.execute("select links data_urls visited = 'no'") no_rows = cursor.fetchall() no_links in no_rows: print 'noooooooooo' k= no_links print k yes_links in yes_rows: print "yessssssssssssss" k1 = yes_links print k1 if k1 == k : print 'equals' cursor.execute("update data_urls set visited = 'ye' links = %s",k) mydb.commit() def process(url): proxies = {"http":"http://proxy4.nehu.ac.in:3128", "https":"https://proxy4.nehu.ac.in:3128"} page = urllib.urlopen(url,proxies=none) text = page.read() page.close() soup = beautifulsoup(text) file=open('s.txt','w') cursor.execute("insert data_urls(links,parent,visited) values(%s,'null','ye')",url) tag in soup.findall('a', href=true): tag['href'] = urlparse.urljoin(url, tag['href']) print tag['href'] if re.match(ur'(?i)\b((?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?\xab\xbb\u201c\u201d\u2018\u2019]))',tag['href']): cursor.execute("insert data_urls(links,parent,visited) values(%s,%s,'no')", (tag['href'],url)) file.write('\n') file.write(tag['href']) #file.close() # cursor.execute("select * url") # rows = cursor.fetchall() mydb.commit() process2(1) def main(): if len(sys.argv) == 1: print 'no url !!' sys.exit(1) url in sys.argv[1:]: process(url) main()
i got no error , nothing updated in database table desc:
+---------+---------------+------+-----+---------+-------+ | field | type | null | key | default | | +---------+---------------+------+-----+---------+-------+ | links | varchar(1000) | yes | | null | | | parent | varchar(1000) | yes | | null | | | visited | varchar(2) | yes | | null | | +---------+---------------+------+-----+---------+-------+
change mydb = mysqldb.connect(host='localhost', user='root', passwd='shailang', db='mydb') cursor = mydb.cursor()
def process2(url): flag=0 cursor.execute("select links data_urls visited = ye") yes_rows = cursor.fetchall() cursor.execute("select links data_urls visited = no") no_rows = cursor.fetchall() count = len(no_rows) in range(0, count): print 'noooooooooo' k= no_links print k j in range (i+1, count): print "yessssssssssssss" k1 = yes_links print k1 if k1 == k : print 'equals' cursor.execute("update data_urls set visited = 'ye' links = %s",k)
Comments
Post a Comment