Analiza Productivitate Top 400 companii de IT din Romania

sursa de date: Ziarul Financiar, Anuarul ZF „Cei mai mari jucători din IT şi telecom 2017“

realizat de: Eduard Budacu

In [119]:
import pandas
data = pandas.read_csv("roit.csv")
In [120]:
data.columns
Out[120]:
Index(['ID', 'FIRMA', 'JUDET', 'AFACERI 2016 (MIL. LEI)', 'VARIATIE_CA',
       'PROFIT NET 2016(MIL. LEI)', 'VARIATIE_PROFIT',
       'MARJA PROFITULUI NET 2016', 'NR.MEDIU ANGAJATI 2016', 'VARIATIE_NA',
       'DATORII LA 31.12.2016 (MIL. LEI)', 'VARIATIE_DAT'],
      dtype='object')
In [121]:
data['NR.MEDIU ANGAJATI 2016'] = pandas.to_numeric(data['NR.MEDIU ANGAJATI 2016'], errors="coerce")
In [122]:
data['PRODUCTIVITATE'] = data['AFACERI 2016 (MIL. LEI)'] / data['NR.MEDIU ANGAJATI 2016']
In [123]:
data['PRODUCTIVITATE_LUNARA'] = data['PRODUCTIVITATE'] / 12
In [202]:
data['PRODUCTIVITATE_EUR'] = (data['PRODUCTIVITATE'] / 4.5) * 1000000
In [204]:
data['PRODUCTIVITATE_EUR'].mean()
Out[204]:
101272.95597703689
In [126]:
import numpy as np
data = data.replace([np.inf, -np.inf], np.nan).dropna()
In [137]:
data[['FIRMA', 'JUDET', 'PRODUCTIVITATE', 'NR.MEDIU ANGAJATI 2016', 'PROFIT NET 2016(MIL. LEI)']].sort_values('PRODUCTIVITATE', ascending=False)
Out[137]:
FIRMA JUDET PRODUCTIVITATE NR.MEDIU ANGAJATI 2016 PROFIT NET 2016(MIL. LEI)
93 Conplan România Services Sibiu 22.500000 1 3.56
233 M Software Timis Timisoara 9.000000 1 0.1
254 Creatiove SEO Constanta 8.100000 1 0.02
42 Zebra Pay Bucuresti 7.471429 7 0.19
115 Planitright Consult Bucuresti 5.800000 3 0.01
61 SoftwareOne Licensing Experts Bucuresti 4.914286 7 0.69
166 Ati Studious A.P.P.S. Brasov 4.033333 3 0.04
77 People For Business Cluj 3.928571 7 0.33
32 Netopia Bucuresti 3.436842 19 1.17
33 TeamNet Project Management Solutions Bucuresti 2.679167 24 0.11
29 Ymens Teamnet Bucuresti 2.116216 37 1.44
92 Fis România Bucuresti 1.753846 13 0.14
253 Piconet Timisoara 1.620000 5 0.28%
255 Head Hunting IT Services Bucuresti 1.600000 5 0.08
124 Kapsch Carriercom România Bucuresti 1.600000 10 3.16
316 Soprano Design Cluj 1.400000 4 0.17
183 Bvfon România Bucuresti 1.387500 8 3.84
263 Smarttel Bucuresti 1.266667 6 1.76
25 PowerTech IT Services Bucuresti 1.198333 72 2.48
23 TeamNet Solutions International Bucuresti 1.112500 88 -1.3
114 Ro Planet Cluj 1.029412 17 0.16
64 Simultec Ilfov 0.937143 35 1.1
122 Dataware Consulting Bucuresti 0.900000 18 0.22
138 Saga Software Bucuresti 0.852941 17 10.3
350 WG Test România Timisoara 0.816667 6 0.84
120 Evision Cluj 0.815000 20 0.41
277 Amiq Eda Bucuresti 0.800000 9 4.13
69 Sincro Soft Dolj 0.773684 38 18.99
104 Logic IT Consult Bucuresti 0.757692 26 6.33
369 Trustic Desing Bucuresti 0.750000 6 0.81
... ... ... ... ... ...
299 Servus Tech Soft Cluj 0.108333 60 0.32
118 Basware Iasi 0.106918 159 1.24
152 Art Soft Consult Cluj 0.104688 128 0.49
177 Ro Software House Dolj 0.103670 109 0.67
363 Wirtek Cluj 0.102174 46 0.05
199 Indeco Soft Maramures 0.102020 99 1.34
324 Ecrion Software International Bucuresti 0.101852 54 -0.04
302 Prodinf Software Arges 0.100000 64 0.02
325 Ciel România Bucuresti 0.100000 55 0.08
109 Sintec Media Dolj 0.099487 195 2.78
19 Gameloft Bucuresti 0.099151 1060 4.28
68 Toluna România Timisoara 0.098350 303 2.26
94 Reea Maramures 0.096552 232 2.42
322 Atisimo Prahova 0.094828 58 1.47
354 Sinoptics Brasov 0.094118 51 0.16
246 Xoomworks Development România Cluj 0.091489 94 -0.15
298 Industrial Software Sibiu 0.089041 73 0.33
159 Hamerun Ciero Bucuresti 0.088889 144 0.48
323 Mondosoft Valcea 0.088710 62 0.17
314 Lynx Solutions MS 0.087692 65 0.98
86 Informatica Feroviara Bucuresti 0.087500 288 0.4
266 Intellient Software Systems Timisoara 0.087209 86 0.03
391 Quantic Lab Cluj 0.081132 53 0.64
360 Expret Network Iasi 0.081034 58 0.76
308 Neusoft EDC Cluj 0.074390 82 -1.73
89 MSG Systems România Cluj 0.073353 334 4.14
380 Mios România Iasi 0.073333 60 0.13
164 Flame MS 0.051883 239 -0.02
108 Evozone Systems Cluj 0.044622 437 -4.43
175 Arezzo Sky Bucuresti 0.044358 257 0.23

383 rows × 5 columns

In [82]:
data[['FIRMA', 'PRODUCTIVITATE', 'NR.MEDIU ANGAJATI 2016']].to_csv('productivitate.csv')
In [85]:
data['NR.MEDIU ANGAJATI 2016'].sum()
Out[85]:
46678.0
In [154]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
 
In [152]:
plt.plot(data['NR.MEDIU ANGAJATI 2016'], data['PRODUCTIVITATE'], 'ro')
Out[152]:
[<matplotlib.lines.Line2D at 0x7f067728d278>]
In [130]:
data['NR.MEDIU ANGAJATI 2016'].describe()
Out[130]:
count     383.000000
mean      121.874674
std       268.720811
min         1.000000
25%        30.000000
50%        53.000000
75%        99.000000
max      3653.000000
Name: NR.MEDIU ANGAJATI 2016, dtype: float64
In [144]:
agg_county = data.groupby('JUDET').agg({'FIRMA':'count', 'NR.MEDIU ANGAJATI 2016':'sum', 'AFACERI 2016 (MIL. LEI)':'sum'}).reset_index()
In [146]:
agg_county['PRODUCTIVITATE'] = agg_county['AFACERI 2016 (MIL. LEI)'] / agg_county['NR.MEDIU ANGAJATI 2016']
In [147]:
agg_county
Out[147]:
JUDET FIRMA AFACERI 2016 (MIL. LEI) NR.MEDIU ANGAJATI 2016 PRODUCTIVITATE
0 - 1 6.10 44 0.138636
1 Arges 1 6.40 64 0.100000
2 BN 1 5.00 22 0.227273
3 Bihor 3 34.90 187 0.186631
4 Botosani 1 15.50 27 0.574074
5 Brasov 14 268.10 1556 0.172301
6 Bucuresi 1 7.60 23 0.330435
7 Bucuresti 185 5952.84 23779 0.250340
8 Cluj 66 2079.10 11214 0.185402
9 Constanta 1 8.10 1 8.100000
10 Dolj 9 177.10 926 0.191253
11 Galati 1 52.40 207 0.253140
12 Hunedoara 1 13.00 53 0.245283
13 ILFOV 1 149.50 739 0.202300
14 IS 1 107.20 566 0.189399
15 Iasi 20 216.40 1390 0.155683
16 Ilfov 9 245.70 1143 0.214961
17 MS 4 41.40 481 0.086071
18 Maramures 5 65.60 426 0.153991
19 NT 1 11.60 79 0.146835
20 OT 1 7.40 54 0.137037
21 Prahova 4 23.40 149 0.157047
22 SJ 1 5.50 30 0.183333
23 SM 1 7.70 11 0.700000
24 Sibiu 14 178.40 831 0.214681
25 Timis 1 5.10 30 0.170000
26 Timisoara 34 513.20 2584 0.198607
27 Valcea 1 5.50 62 0.088710
In [166]:
plt.boxplot(data['NR.MEDIU ANGAJATI 2016'], showfliers=False)
Out[166]:
{'boxes': [<matplotlib.lines.Line2D at 0x7f0676fe25c0>],
 'caps': [<matplotlib.lines.Line2D at 0x7f0676fe7f60>,
  <matplotlib.lines.Line2D at 0x7f0676fec7b8>],
 'fliers': [],
 'means': [],
 'medians': [<matplotlib.lines.Line2D at 0x7f0676fec8d0>],
 'whiskers': [<matplotlib.lines.Line2D at 0x7f0676fe2ef0>,
  <matplotlib.lines.Line2D at 0x7f0676fe7748>]}
In [164]:
data.groupby('JUDET')['NR.MEDIU ANGAJATI 2016']
Out[164]:
JUDET
-               44
Arges           64
BN              22
Bihor          187
Botosani        27
Brasov        1556
Bucuresi        23
Bucuresti    23779
Cluj         11214
Constanta        1
Dolj           926
Galati         207
Hunedoara       53
ILFOV          739
IS             566
Iasi          1390
Ilfov         1143
MS             481
Maramures      426
NT              79
OT              54
Prahova        149
SJ              30
SM              11
Sibiu          831
Timis           30
Timisoara     2584
Valcea          62
Name: NR.MEDIU ANGAJATI 2016, dtype: float64
In [211]:
data.groupby('JUDET')['FIRMA'].count()
Out[211]:
JUDET
-              1
Arges          1
BN             1
Bihor          3
Botosani       1
Brasov        14
Bucuresi       1
Bucuresti    185
Cluj          66
Constanta      1
Dolj           9
Galati         1
Hunedoara      1
ILFOV          1
IS             1
Iasi          20
Ilfov          9
MS             4
Maramures      5
NT             1
OT             1
Prahova        4
SJ             1
SM             1
Sibiu         14
Timis          1
Timisoara     34
Valcea         1
Name: FIRMA, dtype: int64
In [191]:
sns.boxplot(x='JUDET', y='NR.MEDIU ANGAJATI 2016', data=data[data['JUDET'].isin(['Bucuresti', 'Cluj', 'Brasov', 'Iasi', 'Timisoara'])], showfliers=False)
/opt/ds/lib/python3.4/site-packages/matplotlib/__init__.py:892: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))
Out[191]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f066df2a7b8>
In [169]:
import seaborn as sns
/opt/ds/lib/python3.4/site-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))
In [195]:
sns.boxplot(x='JUDET', y='PRODUCTIVITATE', data=data[data['JUDET'].isin(['Bucuresti', 'Cluj', 'Brasov', 'Iasi', 'Timisoara'])], showfliers=False)
/opt/ds/lib/python3.4/site-packages/matplotlib/__init__.py:892: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))
Out[195]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f066df078d0>
In [193]:
sns.boxplot(x='JUDET', y='AFACERI 2016 (MIL. LEI)', data=data[data['JUDET'].isin(['Bucuresti', 'Cluj', 'Brasov', 'Iasi', 'Timisoara'])], showfliers=False)
/opt/ds/lib/python3.4/site-packages/matplotlib/__init__.py:892: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))
Out[193]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f066dd7a9b0>
In [205]:
ax = sns.boxplot(x='JUDET', y='PRODUCTIVITATE_EUR', data=data[data['JUDET'].isin(['Bucuresti', 'Cluj', 'Brasov', 'Iasi', 'Timisoara'])], showfliers=False)
/opt/ds/lib/python3.4/site-packages/matplotlib/__init__.py:892: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))
In [200]:
sns.swarmplot(x="PRODUCTIVITATE", data=data)
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-200-e9d03bc14504> in <module>()
----> 1 sns.swarmplot(x="PRODUCTIVITATE", data=data)

AttributeError: 'module' object has no attribute 'swarmplot'
In [206]:
data['PRODUCTIVITATE_EUR_MONTH'] = data['PRODUCTIVITATE_EUR'] / 12 
In [207]:
ax = sns.boxplot(x='JUDET', y='PRODUCTIVITATE_EUR_MONTH', data=data[data['JUDET'].isin(['Bucuresti', 'Cluj', 'Brasov', 'Iasi', 'Timisoara'])], showfliers=False)
/opt/ds/lib/python3.4/site-packages/matplotlib/__init__.py:892: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))
In [208]:
ax = sns.boxplot(y='PRODUCTIVITATE_EUR_MONTH', data=data[data['JUDET'].isin(['Bucuresti', 'Cluj', 'Brasov', 'Iasi', 'Timisoara'])], showfliers=False)
/opt/ds/lib/python3.4/site-packages/matplotlib/__init__.py:892: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))
In [214]:
data['VARIATIE_NA'].max()
Out[214]:
'96'
In [ ]: