-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
109 lines (96 loc) · 2.02 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from sklearn.datasets import load_iris
import pandas as pd
from pandasql import sqldf
from pandasql import load_meat, load_births
import re
births = load_births()
meat = load_meat()
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
iris_df.columns = [re.sub("[() ]", "", col) for col in iris_df.columns]
print(sqldf("SELECT * FROM iris_df LIMIT 10;", locals()))
print(sqldf("SELECT sepalwidthcm, species FROM iris_df LIMIT 10;", locals()))
q = """
select
species
, avg(sepalwidthcm)
, min(sepalwidthcm)
, max(sepalwidthcm)
from
iris_df
group by
species;
"""
print("*" * 80)
print("aggregation")
print("-" * 80)
print(q)
print(sqldf(q, locals()))
def pysqldf(q):
"add this to your script if you get tired of calling locals()"
return sqldf(q, globals())
print("*" * 80)
print("calling from a helper function")
print('''def pysqldf(q):)
"add this to your script if you get tired of calling locals()"
return sqldf(q, globals())''')
print("-" * 80)
print(q)
print(pysqldf(q))
q = """
select
a.*
from
iris_df a
inner join
iris_df b
on a.species = b.species
limit 10;
"""
print("*" * 80)
print("joins")
print("-" * 80)
print(q)
print(pysqldf(q))
q = """
select
*
from
iris_df
where
species = 'virginica'
and sepallengthcm > 7.7;
"""
print("*" * 80)
print("where clause")
print("-" * 80)
print(q)
print(pysqldf(q))
iris_df['id'] = range(len(iris_df))
q = """
select
*
from
iris_df
where
id in (select id from iris_df where sepalwidthcm*sepallengthcm > 25);
"""
print("*" * 80)
print("subqueries")
print("-" * 80)
print(q)
print(pysqldf(q))
q = """
SELECT
m.*
, b.births
FROM
meat m
INNER JOIN
births b
on m.date = b.date
ORDER BY
m.date;
"""
print(pysqldf(q).head())