{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "460000"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Linear Model\n",
    "def evFiyatTahmini1(oda,salon,alan) :\n",
    "    return 80000 * oda + 100000*salon + alan * 500 + 20000\n",
    "\n",
    "evFiyatTahmini1(3,1,200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "600000"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Non-Linear (Tree-based)\n",
    "def evFiyatTahmini2(oda,salon,alan) :\n",
    "    if (oda <=1):\n",
    "        if(salon <2):\n",
    "            if(alan <100):\n",
    "                return 200000\n",
    "            elif (alan < 200):\n",
    "                return 280000\n",
    "        else:\n",
    "            if(alan <100):\n",
    "                return 300000\n",
    "            elif (alan < 200):\n",
    "                return 480000\n",
    "    else:\n",
    "        return 600000\n",
    "evFiyatTahmini2(3,1,200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---+-----+----+------+---------------+\n",
      "|oda|salon|alan| label|       features|\n",
      "+---+-----+----+------+---------------+\n",
      "|  3|    1| 200|420000|[3.0,1.0,200.0]|\n",
      "|  2|    1| 120|200000|[2.0,1.0,120.0]|\n",
      "|  1|    1|  80|175000| [1.0,1.0,80.0]|\n",
      "|  3|    1| 180|390000|[3.0,1.0,180.0]|\n",
      "|  3|    1| 160|350000|[3.0,1.0,160.0]|\n",
      "+---+-----+----+------+---------------+\n",
      "\n",
      "Bitti\n",
      "Çarpanlar  [-1571.4327382532042,0.0,2292.8572123661797]\n",
      "Sabit Sayi  -28571.428858386877\n"
     ]
    }
   ],
   "source": [
    "from pyspark.sql import SparkSession\n",
    "from pyspark.ml.feature import VectorAssembler\n",
    "from pyspark.ml.regression import LinearRegression\n",
    "\n",
    "spark = SparkSession.Builder().appName(\"DataFrame Introduction\").getOrCreate()\n",
    "evDF = spark.read.option(\"delimiter\",\";\").option('inferSchema','true').option(\"header\",\"true\").csv(\"datasets/ev-fiyatlari.csv\")\n",
    "evDF = evDF.withColumnRenamed('fiyat','label')\n",
    "vec = VectorAssembler(inputCols=['oda','salon','alan'],outputCol='features')\n",
    "evDF = vec.transform(evDF)\n",
    "\n",
    "evDF.show()\n",
    "lr = LinearRegression()\n",
    "lrModel = lr.fit(evDF)\n",
    "print('Bitti')\n",
    "\n",
    "print(\"Çarpanlar \", lrModel.coefficients)\n",
    "print(\"Sabit Sayi \",lrModel.intercept)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "379276.0"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Normalde bu metodu elle yazmamıza gerek yoktur\n",
    "#Prediction yapmak için sadece lrModel.transform(yenievler) metodunu çağırmamız yeterlidir\n",
    "#Bunu sadece görelim diye yaptık\n",
    "def evFiyatTahmini3(oda,salon,alan) :\n",
    "    return -1571 * oda + 0.0*salon + 2292 * alan + -28571\n",
    "\n",
    "evFiyatTahmini3(3,1,180)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---------------+------+\n",
      "|       features| label|\n",
      "+---------------+------+\n",
      "|[3.0,1.0,200.0]|420000|\n",
      "|[2.0,1.0,120.0]|200000|\n",
      "| [1.0,1.0,80.0]|175000|\n",
      "|[3.0,1.0,180.0]|390000|\n",
      "|[3.0,1.0,160.0]|350000|\n",
      "+---------------+------+\n",
      "\n",
      "DecisionTreeRegressionModel: uid=DecisionTreeRegressor_fc2dee4131fd, depth=3, numNodes=9, numFeatures=3\n",
      "  If (feature 0 <= 2.5)\n",
      "   If (feature 0 <= 1.5)\n",
      "    Predict: 175000.0\n",
      "   Else (feature 0 > 1.5)\n",
      "    Predict: 200000.0\n",
      "  Else (feature 0 > 2.5)\n",
      "   If (feature 2 <= 170.0)\n",
      "    Predict: 350000.0\n",
      "   Else (feature 2 > 170.0)\n",
      "    If (feature 2 <= 190.0)\n",
      "     Predict: 390000.0\n",
      "    Else (feature 2 > 190.0)\n",
      "     Predict: 420000.0\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from pyspark.sql import SparkSession\n",
    "from pyspark.ml.feature import VectorAssembler\n",
    "from pyspark.ml.regression import DecisionTreeRegressor\n",
    "\n",
    "spark = SparkSession.Builder().appName(\"DataFrame Introduction\").getOrCreate()\n",
    "evDF = spark.read.option(\"delimiter\",\";\").option('inferSchema','true').option(\"header\",\"true\").csv(\"datasets/ev-fiyatlari.csv\")\n",
    "evDF = evDF.withColumnRenamed('fiyat','label')\n",
    "vec = VectorAssembler(inputCols=['oda','salon','alan'],outputCol='features')\n",
    "evDF = vec.transform(evDF)\n",
    "evDF = evDF.select('features','label')\n",
    "evDF.show()\n",
    "dt = DecisionTreeRegressor()\n",
    "dtmodel = dt.fit(evDF)\n",
    "print(dtmodel.toDebugString)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---+-----+----+---------------+\n",
      "|oda|salon|alan|       features|\n",
      "+---+-----+----+---------------+\n",
      "|  2|    1| 110|[2.0,1.0,110.0]|\n",
      "|  1|    1|  78| [1.0,1.0,78.0]|\n",
      "|  3|    1| 187|[3.0,1.0,187.0]|\n",
      "+---+-----+----+---------------+\n",
      "\n",
      "+---+-----+----+---------------+------------------+\n",
      "|oda|salon|alan|       features|        prediction|\n",
      "+---+-----+----+---------------+------------------+\n",
      "|  2|    1| 110|[2.0,1.0,110.0]|220499.99902538647|\n",
      "|  1|    1|  78| [1.0,1.0,78.0]|148700.00096792192|\n",
      "|  3|    1| 187|[3.0,1.0,187.0]|395478.57163932914|\n",
      "+---+-----+----+---------------+------------------+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#Bulduğumuz ML Modeli yeni verilere uyguayacağız\n",
    "yeniDF = spark.read.option(\"delimiter\",\";\").option('inferSchema','true').option(\"header\",\"true\").csv(\"datasets/yeni-evler.csv\")\n",
    "yeniDF = vec.transform(yeniDF)\n",
    "yeniDF.show()\n",
    "tahminler1DF = lrModel.transform(yeniDF)\n",
    "tahminler1DF.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---+-----+----+---------------+----------+\n",
      "|oda|salon|alan|       features|prediction|\n",
      "+---+-----+----+---------------+----------+\n",
      "|  2|    1| 110|[2.0,1.0,110.0]|  200000.0|\n",
      "|  1|    1|  78| [1.0,1.0,78.0]|  175000.0|\n",
      "|  3|    1| 187|[3.0,1.0,187.0]|  390000.0|\n",
      "+---+-----+----+---------------+----------+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "tahminler2DF = dtmodel.transform(yeniDF)\n",
    "tahminler2DF.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#ML projenin % 80 feature ile uğraşmaktır (veri (null değerler ..vs) temizleme, ilgili kolonları seçme, kolonları eleme ...vs)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
