DATA ANALYTICS REFERENCE DOCUMENT |
|
---|---|
Document Title: | 52553 - Applied Database |
Document No.: | 1548347488 |
Author(s): | Rita Raher, Gerhard van der Linde |
Contributor(s): |
REVISION HISTORY
Revision | Details of Modification(s) | Reason for modification | Date | By |
---|---|---|---|---|
0 | Draft release | 52553 - Applied Database reference page | 2019/01/24 16:31 | Gerhard van der Linde |
I'll detail the breakdown of the 40% continuous assessment over the coming weeks in the module.
CRUD
mysql> create database myFirstDatabase; Query OK, 1 row affected (0.01 sec)
mysql> CREATE dataBase MYFirstDATABASE; Query OK, 1 row affected (0.01 sec)
mysql> create -> database -> MyFirstDatabase -> ; Query OK, 1 row affected (0.01 sec)
mysql> show databases; +--------------------+ | Database | +--------------------+ | information_schema | | myfirstdatabase | | mysql | | performance_schema | | sys | +--------------------+ 5 rows in set (0.00 sec)
mysql> use myfirstdatabase; Database changed
mysql> create table car ( -> make VARCHAR(20), -> model VARCHAR(20), -> registration VARCHAR(15), -> colour VARCHAR(10), -> mileage INTEGER, -> enginSize FLOAT(2,1)); Query OK, 0 rows affected (0.09 sec)
mysql> describe car; +--------------+-------------+------+-----+---------+-------+ | Field | Type | Null | Key | Default | Extra | +--------------+-------------+------+-----+---------+-------+ | make | varchar(20) | YES | | NULL | | | model | varchar(20) | YES | | NULL | | | registration | varchar(15) | YES | | NULL | | | colour | varchar(10) | YES | | NULL | | | mileage | int(11) | YES | | NULL | | | enginSize | float(2,1) | YES | | NULL | | +--------------+-------------+------+-----+---------+-------+ 6 rows in set (0.02 sec)
Primary key - unique values. 1 primary key pair table add personID - int auto_increment flag to increase
Using reg as primary key
reg VARCHAR(15) PRIMARY KEY(reg)
SELECT personID, NAME, AGE FROM person WHERE age >= 20 AND age <= 39; SELECT personID, NAME, AGE FROM person WHERE age between 20 and 39;
SELECT personID, NAME, AGE FROM person WHERE name LIKE “%a%”; SELECT personID, NAME, AGE FROM person WHERE name LIKE “_a%”;
SELECT personID, NAME, AGE FROM person WHERE age IN (12, 13, 14, 15); SELECT NAME, AGE FROM person WHERE sex =“M” AND name LIKE “S%” OR name LIKE “A%"
SELECT NAME, AGE FROM person WHERE sex =“M” AND name LIKE “S%” OR name LIKE “A%" LIMIT 1; SELECT NAME, AGE FROM person WHERE sex =“M” AND name LIKE “S%” OR name LIKE “A%" LIMIT 0,3;
YEAR() DAY()
SELECT name, age, MONTHNAME(dob) FROM person WHERE day(DOB) between 1 and 11 AND name NOT like “A%" order by name DESC
REPLACE(name, “Ms”, “MRS”)
SUBSTR(name, 1, 3)**
ROUND(enginesize)
DATE_FORMAT(dob, "%d-%m-%y”)
An aggregate function performs a calculation on a set of values and returns a single value.
Group By The groupie statement is often used with aggregate functions(COUNT, MAX, MIN, SUM, AVG) to group the result-set by one or more columns
SELECT level, AVG(experience) FROM teacher GROUP BY level; SELECT ROUND(AVG(mileage)) as “KMs” FROM car; SELECT model, ROUND(AVG(mileage)) as “KMs” FROM car GROUP BY model; SELECT model, ROUND(AVG(mileage)) as “KMs” FROM car WHERE mileage > 60000 GROUP BY model;
SELECT model, ROUND(AVG(mileage)) as “KMs” FROM car WHERE mileage > 60000 GROUP BY model HAVING KMs > 250000;
SELECT IF(150>200, “yes”, “no”)”T/F”;
SELECT *,IF(experience >= 20 AND experience <= 45, “Y”, “”) as “Payrise Due” FROM teacher
CASE WHEN condition 1 THEN result 1 WHEN condition 2 THEN result 1 WHEN condition n THEN result n ELSE result END
SELECT name, dob, CASE WHEN month(dob) in (2,3,4) THEN “Spring” WHEN month(dob) in (5,6,7) THEN “Summer” WHEN month(dob) in (8,9,10) THEN “Autumn” WHEN month(dob) in (11,12,1) THEN “Winter” END as Season from person;
SELECT name, dob, CASE WHEN month(dob) in (2,3,4) THEN “Spring” WHEN month(dob) in (5,6,7) THEN “Summer” ELSE “" END as Season from person;
A stored routine is user-written code that extends the functionality Mysql.
Uses
Advantages
Disadvantages
https://dev.mysql.com/doc/refman/8.0/en/create-procedure.html
CREATE FUNCTION add2Nums(num1 integer, num2 integer) RETURNS integer DETERMINISTIC BEGINS RETURN num1 + num2; END SELECT add2Nums(3, 10);
CREATE FUNCTION discount(age INT(11)) RETURNS VARCHAR(3) DETERMINISTIC BEGIN IF age < 16 THEN RETURN “0%”; ELSEIF age < 26 THEN RETURN “10%”; ELSEIF age < 40 THEN RETURN “20%”; ELSEIF age < 60 THEN RETURN “30%”; ELSE RETURN “40%”; END IF; END
SELECT name, age, discount(age) “Discount” FROM person
FUNCTIONS | PROCEDURES |
---|---|
Return a single value | Retun 0 or more values |
Only select | select, insert, update, delete |
Cant use stored procedures | can use stored functions |
Does not support transactions | Support transactions |
https://dev.mysql.com/doc/refman/8.0/en/create-procedure.html
Create procedure CREATE PROCEDURE make_mileage(mk VARCHAR(20), ml INT(11)) DETERMINISTIC BEGIN SELECT * FROM CAR WHERE make LIKE mk AND mileage < ml ORDER BY mileage; END
Call Procedure
call make_mileage(“Toyota”, 200000) call make_mileage(“Ford”, 5000)
Finding Functions and Procedures
SELECT name, type from MYSQL.PROCE limit 3;
What’s in a function Procedure
SHOW CREATE FUNCTION add2nums;
Drop a Function or Procedure
DROP FUNCTION add2nums
Normalization is the process of organizing the columns (attributes) and tables (relations) of a relational database to minimize data redundancy.
show create table manufacturer; +--------------+-------------------------------------+ | Table | Create Table | +--------------+-------------------------------------+ | manufacturer | CREATE TABLE `manufacturer` ( `manu_code` varchar(3) NOT NULL, `manu_name` varchar(200) NOT NULL, `manu_details` varchar(400) DEFAULT NULL, PRIMARY KEY (`manu_code`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1 | +--------------+-------------------------------------+ 1 row in set (0.00 sec)
INNER JOIN | LEFT JOIN |
---|---|
Return rows from two tables only when the JOIN condition is met. | Retrun rows from two tables when the JOIN condition is met. |
If JOIN condition is not met, nothing is returned from either table. | If JOIN condition is not met, rows from the first (LEFT) table are returned and NULL is returned instead of rows from the second table. |
INSERT INTO person VALUES(1, "John", 23, "M", 1);
INSERT INTO person (age, sex, isStudent) VALUES (20, "F", 0);
https://dev.mysql.com/doc/refman/8.0/en/working-with-null.html
https://dev.mysql.com/doc/refman/8.0/en/update.html
UPDATE Person SET age = 23 WHERE personID = 3; UPDATE person set name = CONCAT(IF(sex="M", "MR.", "Ms.",name));
https://dev.mysql.com/doc/refman/8.0/en/delete.html
* DELETE FROM <table>;
* DELETE FROM <table>
WHERE condition;
DELETE from PERSON WHERE personID = 6; DELETE FROM person WHERE sex = "M" AND isStudent AND age > 20;
Foreign keys can be used to define table behavior when data is deleted. The default behavior for MySQL is ON DELETE RESTRICT, even if not specified.
MySQL Command | MySQL Behaviour |
---|---|
RESTRICT | Prevents entried from being deleted where a foreign key exists |
CASCADE | Proceed to delete entries from the table worked on as well as deleting the referenced entries |
SET NULL | Delete the entries after setting the references to NULL |
SELECT emp_no, first_name, last_name FROM employees WHERE emp_no IN( SELECT emp_no FROM salaries WHERE salary = ( SELECT MAX(salary) FROM salaries ) );
{ "id" : 1 }
{ "id" : 3.14 }
Note that there is no distinction between integer and floating point numbers.
{ "id" : 1, "fname" : "John" }
{ "reg" : "09-G-13" "hybrid" : false }
{ "student" : "G00257854" "subjects" : ["Databases", "Java", "Mobile Apps"] }
{ "student" : "G00257854" "address" : { "street" : "Castle Street" "town" : "Athenry" "county" : "Galway" } }
Document - slide 12….
A document is record in a MongoDB collection and the basic unit of data in MongoDB. Documents are analogous to JSON objects or records in an RDBMS.
Collection
Database A number of databases can be run on a single MongoDB server.
MongoDB Commands
Does not update Mary
db.user.find()
To find only documents that have an email attribute and age is greater than 20
db.user.find({$and:[{email: {$exists:true}}, {age:{$gt:20}}]})
db.User.find()
Return only the email attribute of documents where age is greater than 18
db.User.find({age: {$gt: 20}}, {email:1})
Return only the first_name and surname attributes of all documents
db.User.find({}, {_id:false, first_name:1, surname:1})
Example
Get the average gpa for all students
db.users.aggregate([{$group:{_id:null, Average{$avg:"$gpa"}}}])
$group same as Group by in MYSQL
Result:
Get the Maximum GPA per age group
db.march8.aggregate([{$group:{_id:"$age", "Max GPA per Age":{$max:"$gpa"}}}])
for each document d in 'user'{ if(d.age == 35){ return d; } }
db.collection.getIndexes()
Which returns information in the following format, detailing the index field (_id) and the order of the indexes(1 is ascending:-1 is descending):
"key":{ "_id":1 }
db.user.createIndex({age:1})
db.collection.dropIndex()
db.collection.dropIndex({age:1})
db.student.save({_id:"G00789445", name: "John", address:{_id: 100, town: "Athenry", county:"Galway"}})
db.student.find({}, {address:1})
{ "_id" : "G00789445", "address" : { "_id" : 100, "town" : "Athenry", "county" : "Galway" } }
db.student.find({address:{$exists: true}}, {_id:0, "address.county":1})
Note: Observe details in projection part of find, i.e. find(query, projection), {_id:0, “address.county”:1}
{_id:0, “address.county”:1} | Meaning |
---|---|
_id:0 | Do NOT output _id field |
“address.county”:1 | Only output county field |
Create the document with the relationships
db.student.save({_id:"G00101224", name:"Mary", modules:[{_id:"M100", module:"Databases"}, _id:"M101", module:"Java"}]})
Show the student's _id and module of all modules taken by student G00101224
db.student.find({_id:"G00101224"}, {"modules.module":1})
projection - only show the module of the modules field
{"_id":"G00101224", "modules":[{"module":"Databases"},{"module":"Java"}]}
In the example the document has only two field, but in reality it can be a very long document with much more information, so it makes sense to use relationships instead.
//save the modules to the docs collection db.docs.save({_id:"M100", module:"Databases"}) db.docs.save({_id:"M101", module:"Java"}) //save the students to the docs collection with references to the modules using the module _id fields. db.docs.save({_id:"G00101224", name:"Mary", modules["M100", "M101"]}) db.docs.save({_id:"G00205266", name:"Sarah", modules["M100", "M101"]})
Using the $lookup pipeline… 3)
Similar to a join in MySQL…
Performs a left outer join to an unsharded collection in the same database to filter in documents from the “joined” collection for processing. To each input document, the $lookup stage adds a new array field whose elements are the matching documents from the “joined” collection. The $lookup stage passes these reshaped documents to the next stage.
{ $lookup: { from: <collection to join>, localField: <field from the input documents>, foreignField: <field from the documents of the "from" collection>, as: <output array field> } }
from - specifies the collection in the same databases to perform the join with. The from collection cannot be sharded.
localField - The value to search for.
foreignField - The field to search for the value specified by localField.
as - The name of the output.
Return all documents including the complete referenced documents
db.docs.aggregate([{$lookup:{from:"docs", localField:"modules", foreignField: "_id", as:"Details"}}])
Features of embedded Documents
Features of Referenced Documents
Features of MongoDB
Features of MySQL
DATA ANALYTICS REFERENCE DOCUMENT |
|
---|---|
Document Title: | Applied Databases - Python |
Document No.: | 1553629016 |
Author(s): | Gerhard van der Linde, Rita Raher |
Contributor(s): |
REVISION HISTORY
Revision | Details of Modification(s) | Reason for modification | Date | By |
---|---|---|---|---|
0 | Draft release | Applied Databases - Python | 2019/03/26 19:36 | Gerhard van der Linde |
Employee ID | Name | Dept | Salary |
---|---|---|---|
100 | John | HR | 25500 |
101 | Mary | R&D | 44500 |
102 | Bill | R&D | 43000 |
103 | Tom | Sales | 40000 |
SELECT * from employees WHERE Salary > 42000; SELECT * from employees WHERE Salary < 30000;
my1stVariable = "Hello World" my2ndVariable = 1 print(my1stVariable) #Hello World my2ndVariable + 4 print(my2ndVariable) # 1
my2ndVariable = 1 x = my2ndVariable + 4 print(my2ndVariable) # 1 print(x) # 5 age = 21 age = age + 1 print(age) # 22
age = 17 if(age > 17): print("OK") print(finished) # finsied
age = 17 if(age > 17): print("OK") elif(age < 18): print("Nok") print(finished) #Nok # finished
temp = 37 if(temp >37): print("Hot") elif(temp <37): print("Cold") else: print("OK") print("Finished") # ok # finished
name = input("Enter name") # Tom email = name + "@gmit.ie" print(email) # Tom@gmit.ie
salary = input("Enter salary") # 30000 salary = int(salary) salary = salary + 100 print(salary)
i = 1 while(i <=5): print(i) i+=1 # i = i +1 # 1 # 2 # 3 # 4 # 5
answer = "5" while True: guess = input("Pick a number between 1 & 10") if(guess==answer): print("Correct!") break print("end")
myArr = ["Jan", "Feb", "March", "April"] print(myArr) #['Jan', 'Feb', 'March', 'April'] print(myArr[0]) # jan print(len(myArr)) #4
myArr = ["Jan", "Feb", "March", "April"] myArr.append("May") print(myArr) ##['Jan', 'Feb', 'March', 'April', "May"]
name = ["Tom", "John", "Mary", "Bob"] for name in names: print(name + "@gmit.ie") # Tom@gmit.ie # John@gmit.ie # Mary@gmit.ie # Bob@gmit.ie myArr = [1, 5, 12] for x in myArr: print(x+1) # 2 # 6 # 13 print(myArr) #[1, 5, 12]
def printMonths(): print("Jan, Feb, Mar") def printDays(): print("Mon, Tue, Wed") printDays() # Mon, Tue, Wed printMonths() # Jan, Feb, Mar
def printMonths(): print("Jan, Feb, Mar") def main(): printMonths() if __name__ =="__main__": # execute only if run as a script main()
print("Hello World") # Hello World print("Test") # Test s = "This is a string" print(len(s)) # 16
def checkAge(age): if age < 18: return "Too Young" return "Accepted" def main(): name = input("Enter:") age = int(input("Enter Age:")) print(name, "is", checkAge(age)) if __name__="__main__": # execute only if run as a script main()
A local variable is a variable that is given local scope. Local variable references in the function or block in which it is declared override the same variable name in the larger scope.
def checkAge(age): limit = 18 if age < limit: return "Too Young" return "Accepted" def main(): name = input("Enter:") age = int(input("Enter Age:")) print(name, "is", checkAge(age), limit) if __name__="__main__": # execute only if run as a script main()
def checkAge(age): limit = 18 if age < limit: return "Too Young" return "Accepted" def main(): limit= "Finished" name = input("Enter:") age = int(input("Enter Age:")) print(name, "is", checkAge(age), limit) if __name__="__main__": # execute only if run as a script main()
def incrementAge(age): age += 1 print(age) # 25 def main(): age = 24 incrementAge(age) print(age) # 24 if __name__="__main__": # execute only if run as a script main()
Declaring the variable outside of the function and using the keword global to make it a global variable
age = 24 def incrementAge(age): # access using the keyword "Global" global age age += 1 print(age) # 25 def main(): incrementAge(age) print(age) #25 if __name__="__main__": # execute only if run as a script main()
conn = pymysql.connect( "localhost", "root", "root", "school", cursorclass=pysql.cursors.DictCursor)
conn = pymysql.connect( "localhost", "root", "root", "school", cursorclass=pysql.cursors.DictCursor, password="root", host="localhost", db="school", port=3306)
query = "SELECT * FROM subject" with conn: cursor = conn.cursor() cursor.execute(query) subjects = cursor.fetchall() for s in sujects: print(s["Name"])
query = "SELECT * FROM subject WHERE teacher LIKE %s" with conn: cursor = conn.cursor() cursor.execute(query, ("Ms.%")) subjects = cursor.fetchall() for s in subjects: print(s["Name"])
ins = "Insert INTO subject (Name, Teacher, OnLeavingCert) VALUE(%s, %s, %s)" with conn: cursor = conn.cursor() cursor.execute(ins, ("Maths", "Ms.Jones", 1)) conn.commit() # commit to database to make a change
try and except block for error messages
ins = "Insert INTO subject (Name, Teacher, OnLeavingCert) VALUE(%s, %s, %s)" with conn: try: cursor = conn.cursor() cursor.execute(ins, ("Maths", "Ms.Jones", 1)) conn.commit() print("Insert successful") except: print("Maths already exists")
name = "Maths" teacher = "Ms.Jones" lc =1 with conn: try: cursor = conn.cursor() cursor.execute(query, (name, teacher, lc)) conn.commit() print("Insert Successful") except pymysql.err.InternalError as e: print("Internal Error", e) except pymysql.err.IntegrityError as e: print("Error", name, "already exists") except Exception as e: print("error", e)
query = "DELETE FROM subject WHERE name = %s" name = "Maths" with conn: try: cursor = conn.cursor() rowAffected = cursor.execute(query, (name)) conn.commit() if(rowsAffected ==0): print("Nothing deleted - ", name , "never existed") else: print(rowAffected, "row(s) deleted") except Exception as e: print("error", e)
query = "UPDATE subject SET teacher = %s WHERE NAME = %s" subject = "Maths" newTeacher = "Mr.Murphy" with conn: try: cursor = conn.cursor() rowsAffected = cursor.execute(query, (newTeacher, subject)) conn.commit() if(rowsAffected ==0): print(subject, "not updated") else: print(subject, "now taught by", newTeacher) except Exception as e: print("error", e)