In [1]:
import graphlab
In [2]:
cars = graphlab.SFrame("D:\\astrah.csv")
This non-commercial license of GraphLab Create for academic use is assigned to eduard.budacu@csie.ase.ro and will expire on November 20, 2017.
[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: C:\Users\EDUARD~1.BUD\AppData\Local\Temp\graphlab_server_1482416044.log.0
Finished parsing file D:\astrah.csv
Parsing completed. Parsed 29 lines in 0.022033 secs.
------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[long,long,long,long,str,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
Finished parsing file D:\astrah.csv
Parsing completed. Parsed 29 lines in 0.024016 secs.
In [21]:
graphlab.canvas.set_target('ipynb')
cars.show()
In [4]:
train_data, test_data = cars.random_split(.8, seed = 201)
In [5]:
train_data
Out[5]:
id an pret km cilindree motorizare
1 2010 4750 216000 1598 Benzina
2 2013 5575 139053 1686 Diesel
3 2009 4990 88433 1598 Benzina
4 2010 4990 200000 1700 Diesel
6 2010 4700 158000 na Diesel
7 2012 5700 112723 1598 Benzina
10 2008 3499 181000 1700 Diesel
11 2008 5200 159000 na Diesel
12 2008 4250 199000 1598 Benzina
13 2008 6250 92700 1600 Benzina
[? rows x 6 columns]
Note: Only the head of the SFrame is printed. This SFrame is lazily evaluated.
You can use sf.materialize() to force materialization.
In [6]:
km_model = graphlab.linear_regression.create(train_data, target="pret", features=['km'])
Linear regression:
--------------------------------------------------------
Number of examples          : 23
Number of features          : 1
Number of unpacked features : 1
Number of coefficients    : 2
Starting Newton Method
--------------------------------------------------------
+-----------+----------+--------------+--------------------+---------------+
| Iteration | Passes   | Elapsed Time | Training-max_error | Training-rmse |
+-----------+----------+--------------+--------------------+---------------+
| 1         | 2        | 1.000363     | 1638.014665        | 643.459195    |
+-----------+----------+--------------+--------------------+---------------+
SUCCESS: Optimal solution found.

In [7]:
print test_data['pret'].mean()
4415.0
In [8]:
km_model.evaluate(test_data)
Out[8]:
{'max_error': 1571.2888496420983, 'rmse': 742.5982102133817}
In [9]:
import matplotlib.pyplot as plt
%matplotlib inline
In [10]:
plt.plot(test_data['km'], test_data['pret'],'.',
         test_data['km'], km_model.predict(test_data),'-')
Out[10]:
[<matplotlib.lines.Line2D at 0x1f914f98>,
 <matplotlib.lines.Line2D at 0x1f927080>]
In [11]:
km_model.get('coefficients')
Out[11]:
name index value stderr
(intercept) None 6222.06569108 466.540994888
km None -0.00882155305825 0.00249741113244
[2 rows x 4 columns]
In [12]:
horia = {'pret':4300, 'an':2008, 'km':101000, 'cilindree':1400, 'motorizare':'Benzina'}
In [13]:
print km_model.predict(horia)
[5331.088832197042]
In [14]:
features = ['an', 'km', 'cilindree']
In [15]:
graphlab.canvas.set_target('ipynb')
cars[features].show()
In [16]:
cars.show(view='Scatter Plot', x='an', y='pret')
In [17]:
features_model = graphlab.linear_regression.create(train_data,target='pret',features=features,validation_set=None)
Linear regression:
--------------------------------------------------------
Number of examples          : 23
Number of features          : 3
In [18]:
print features_model.predict(horia)
Number of unpacked features : 3
Number of coefficients    : 10
Starting Newton Method
--------------------------------------------------------
[5275.1151718839565]
+-----------+----------+--------------+--------------------+---------------+
| Iteration | Passes   | Elapsed Time | Training-max_error | Training-rmse |
+-----------+----------+--------------+--------------------+---------------+
In [19]:
print km_model.evaluate(test_data)
print features_model.evaluate(test_data)
| 1         | 2        | 0.001000     | 865.047939         | 410.579308    |
+-----------+----------+--------------+--------------------+---------------+
SUCCESS: Optimal solution found.
{'max_error': 1571.2888496420983, 'rmse': 742.5982102133817}

{'max_error': 1704.7236260885802, 'rmse': 914.7539562671418}
In [20]:
plt.plot(test_data['km'], test_data['pret'],'.',
         test_data['km'], km_model.predict(test_data),'-',
        101000,km_model.predict(horia),'.')
Out[20]:
[<matplotlib.lines.Line2D at 0x1fc1ecf8>,
 <matplotlib.lines.Line2D at 0x1fc1eda0>,
 <matplotlib.lines.Line2D at 0x1fc2c470>]
In [ ]: