I am new to coding and need help with my first big task, linking data from csv
to postgresql
using python. The code seems to function but when I look at it using postgresql
, for each row it appends the previous row. Focusing on the table teams
, considering I have 20 teams
I want to have 20 rows with each team
on a different row and with a different ID
. At this stage, considering I have 110 games
, after 10 games
the data just repeats itself in different rows. What's the solution? Thanks!
#!/usr/bin/python
# -*- coding: utf-8 -*-
import psycopg2
import sys
import csv
from itertools import count, cycle
from _tkinter import create
from setuptools.dist import sequence
from email.policy import default
path = r'C:\Users\sammy\Downloads\E0.csv'
with open(path, "r") as csvfile:
readCSV = csv.reader(csvfile, delimiter=",")
firstline = 1
con = None
con = psycopg2.connect("host='localhost' dbname='football' user='postgres' password='XXX'")
cur = con.cursor()
cur.execute("DROP TABLE games")
cur.execute("DROP TABLE teams")
cur.execute("DROP TABLE referees")
cur.execute("CREATE TABLE teams (HomeTeamID SERIAL PRIMARY KEY, AllTeams123 VARCHAR)")
cur.execute("CREATE TABLE referees (RefereeID SERIAL PRIMARY KEY, RefereeName VARCHAR)")
cur.execute("CREATE TABLE games (GAMEID SERIAL PRIMARY KEY, HomeTeamID INTEGER, FOREIGN KEY (HomeTeamID) REFERENCES teams(HomeTeamID), HomeTeam VARCHAR, AwayTeamID VARCHAR, AwayTeam VARCHAR, FTHG INTEGER, ATHG INTEGER, FTR VARCHAR, RefereeID INTEGER, FOREIGN KEY (RefereeID) REFERENCES referees(RefereeID), RefereeName VARCHAR, HY INTEGER, AY INTEGER)")
hometeams = []
awayteams = []
uniqueteams = []
uniquereferees = []
allreferees = []
allteams = hometeams + awayteams
gameuniqueteams = sorted(uniqueteams)
gameuniquereferees = sorted(uniquereferees)
try:
for row in readCSV:
if firstline:
firstline=0
continue
game1 = row[2]
game2 = row[3]
HomeTeam = row[2]
AwayTeamID = row[3]
AwayTeam = row[3]
FTHG = row[4]
ATHG = row[5]
FTR = row[6]
RefereeID = row[10]
RefereeName = row[10]
HY = row[19]
AY = row[20]
hometeams.append(HomeTeam)
awayteams.append(AwayTeam)
allteams = hometeams + awayteams
allreferees.append(RefereeName)
uniqueteams = []
uniquereferees = []
for x in allteams:
if x not in uniqueteams:
uniqueteams.append(x)
## hi
for x in allreferees:
if x not in uniquereferees:
uniquereferees.append(x)
gameuniqueteams = sorted(uniqueteams)
gameuniquereferees = sorted(uniquereferees)
data1 = (gameuniqueteams,)
data2 = (gameuniquereferees,)
data3 = (HomeTeam, AwayTeamID, AwayTeam, FTHG, ATHG, FTR, RefereeName, HY, AY)
query1 = "INSERT INTO teams (AllTeams123) VALUES (%s);"
query2 = "INSERT INTO Referees (RefereeName) VALUES (%s);"
query3 = "INSERT INTO games (HomeTeam, AwayTeamID, AwayTeam, FTHG, ATHG, FTR, RefereeName, HY, AY) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s);"
cursor = con.cursor()
cursor.execute(query1, data1)
cursor.execute(query2, data2)
cursor.execute(query3, data3)
## hi
for x in allteams:
if x not in uniqueteams:
uniqueteams.append(x)
## hi
for x in allreferees:
if x not in uniquereferees:
uniquereferees.append(x)
except psycopg2.DatabaseError as e:
if con:
con.rollback()
print ("Error %s % e", e)
sys.exit(1)
finally:
if con:
con.commit()
con.close()
out=open("new_data.csv", "w")
output = csv.writer(out)
for row in data1:
output.writerow(row)
out.close()
gameuniqueteams = sorted(uniqueteams)
gameuniquereferees = sorted(uniquereferees)
print (hometeams)
print (awayteams)
print(uniqueteams)
print(gameuniqueteams)
print(uniquereferees)
print(gameuniquereferees)