{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Pandas IO.ipynb",
      "version": "0.3.2",
      "provenance": [],
      "collapsed_sections": [
        "OV2e4bJ12eOi",
        "NXuU1IUz2jnJ",
        "rWFG7JlcAEwc"
      ]
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "metadata": {
        "id": "OV2e4bJ12eOi",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "# Criando os nomes"
      ]
    },
    {
      "metadata": {
        "id": "7f53UWACvaCX",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "import pandas as pd"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "XxOW1IZZwV-K",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "nomes_m = pd.read_json(\"https://servicodados.ibge.gov.br/api/v1/censos/nomes/ranking?qtd=200&sexo=m\")\n",
        "nomes_f = pd.read_json(\"https://servicodados.ibge.gov.br/api/v1/censos/nomes/ranking?qtd=200&sexo=f\")"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "glPFYAfIwqag",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 54
        },
        "outputId": "84938ef7-4eb6-46ab-95a7-53fedd02c1bb"
      },
      "cell_type": "code",
      "source": [
        "print(\"Quantidade de nomes: \" + str(len(nomes_f) + len(nomes_m)))"
      ],
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Quantidade de nomes: 400\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "ZhxSJe14w6t8",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "frames = [nomes_f, nomes_m]"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "TexNJ2VoxotO",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 2196
        },
        "outputId": "10675162-9d77-4b1c-e6b4-12adf1c97949"
      },
      "cell_type": "code",
      "source": [
        "frames"
      ],
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[         freq        nome  rank  regiao sexo\n",
              " 0    11694738       MARIA     1       0    F\n",
              " 1     3079729         ANA     2       0    F\n",
              " 2      721637   FRANCISCA     3       0    F\n",
              " 3      588783     ANTONIA     4       0    F\n",
              " 4      565621     ADRIANA     5       0    F\n",
              " 5      562589     JULIANA     6       0    F\n",
              " 6      551855      MARCIA     7       0    F\n",
              " 7      531607    FERNANDA     8       0    F\n",
              " 8      529446    PATRICIA     9       0    F\n",
              " 9      509869       ALINE    10       0    F\n",
              " 10     479230      SANDRA    11       0    F\n",
              " 11     469851      CAMILA    12       0    F\n",
              " 12     464624      AMANDA    13       0    F\n",
              " 13     460770       BRUNA    14       0    F\n",
              " 14     456472     JESSICA    15       0    F\n",
              " 15     434056     LETICIA    16       0    F\n",
              " 16     430067       JULIA    17       0    F\n",
              " 17     429769     LUCIANA    18       0    F\n",
              " 18     417512     VANESSA    19       0    F\n",
              " 19     381778     MARIANA    20       0    F\n",
              " 20     378807    GABRIELA    21       0    F\n",
              " 21     376202        VERA    22       0    F\n",
              " 22     366797     VITORIA    23       0    F\n",
              " 23     360523     LARISSA    24       0    F\n",
              " 24     358175     CLAUDIA    25       0    F\n",
              " 25     354980     BEATRIZ    26       0    F\n",
              " 26     354720        RITA    27       0    F\n",
              " 27     354625       LUANA    28       0    F\n",
              " 28     354487       SONIA    29       0    F\n",
              " 29     345195      RENATA    30       0    F\n",
              " ..        ...         ...   ...     ...  ...\n",
              " 170     93355   MARINALVA   171       0    F\n",
              " 171     92470   TERESINHA   172       0    F\n",
              " 172     92065      REBECA   173       0    F\n",
              " 173     91752     GERALDA   174       0    F\n",
              " 174     90779    CLEONICE   175       0    F\n",
              " 175     89381   ALEXANDRA   176       0    F\n",
              " 176     88631        MARA   177       0    F\n",
              " 177     88574     JENIFER   178       0    F\n",
              " 178     88505  ELIZANGELA   179       0    F\n",
              " 179     88064     LILIANE   180       0    F\n",
              " 180     88023       LIDIA   181       0    F\n",
              " 181     87953     IRACEMA   182       0    F\n",
              " 182     86963     LIDIANE   183       0    F\n",
              " 183     86816       IRACI   184       0    F\n",
              " 184     86734     MARILIA   185       0    F\n",
              " 185     85939      SHEILA   186       0    F\n",
              " 186     85322        INES   187       0    F\n",
              " 187     85317      NAIARA   188       0    F\n",
              " 188     84237     POLIANA   189       0    F\n",
              " 189     83485       DALVA   190       0    F\n",
              " 190     83288      LURDES   191       0    F\n",
              " 191     83045      TERESA   192       0    F\n",
              " 192     82927       ZILDA   193       0    F\n",
              " 193     82838    LUCILENE   194       0    F\n",
              " 194     82626       CLARA   195       0    F\n",
              " 195     82203      ROSANE   196       0    F\n",
              " 196     81878     IVONETE   197       0    F\n",
              " 197     81230     LUCIANE   198       0    F\n",
              " 198     81100     ISADORA   199       0    F\n",
              " 199     80128       NILZA   200       0    F\n",
              " \n",
              " [200 rows x 5 columns],         freq       nome  rank  regiao sexo\n",
              " 0    5732508       JOSE     1       0    M\n",
              " 1    2971935       JOAO     2       0    M\n",
              " 2    2567494    ANTONIO     3       0    M\n",
              " 3    1765197  FRANCISCO     4       0    M\n",
              " 4    1483121     CARLOS     5       0    M\n",
              " 5    1417907      PAULO     6       0    M\n",
              " 6    1213557      PEDRO     7       0    M\n",
              " 7    1116818      LUCAS     8       0    M\n",
              " 8    1102927       LUIZ     9       0    M\n",
              " 9    1101126     MARCOS    10       0    M\n",
              " 10    931530       LUIS    11       0    M\n",
              " 11    922744    GABRIEL    12       0    M\n",
              " 12    814709     RAFAEL    13       0    M\n",
              " 13    706527     DANIEL    14       0    M\n",
              " 14    690098    MARCELO    15       0    M\n",
              " 15    663271      BRUNO    16       0    M\n",
              " 16    628539    EDUARDO    17       0    M\n",
              " 17    615924     FELIPE    18       0    M\n",
              " 18    611174   RAIMUNDO    19       0    M\n",
              " 19    598825    RODRIGO    20       0    M\n",
              " 20    590924     MANOEL    21       0    M\n",
              " 21    583962     MATEUS    22       0    M\n",
              " 22    578975      ANDRE    23       0    M\n",
              " 23    551776   FERNANDO    24       0    M\n",
              " 24    545581      FABIO    25       0    M\n",
              " 25    544258   LEONARDO    26       0    M\n",
              " 26    537598    GUSTAVO    27       0    M\n",
              " 27    525028  GUILHERME    28       0    M\n",
              " 28    506483    LEANDRO    29       0    M\n",
              " 29    492149      TIAGO    30       0    M\n",
              " ..       ...        ...   ...     ...  ...\n",
              " 170    76472  ALEXANDRO   171       0    M\n",
              " 171    75699   VALDEMIR   172       0    M\n",
              " 172    75619  WANDERSON   173       0    M\n",
              " 173    75076    JAILSON   174       0    M\n",
              " 174    75029      DENIS   175       0    M\n",
              " 175    74729       RIAN   176       0    M\n",
              " 176    74693      CELIO   177       0    M\n",
              " 177    74264  CLAUDINEI   178       0    M\n",
              " 178    74095   DENILSON   179       0    M\n",
              " 179    73618       IAGO   180       0    M\n",
              " 180    73467       EDER   181       0    M\n",
              " 181    73204       YURI   182       0    M\n",
              " 182    72715    CHARLES   183       0    M\n",
              " 183    72507     MARLON   184       0    M\n",
              " 184    72186     ALISON   185       0    M\n",
              " 185    71812    JONATAN   186       0    M\n",
              " 186    71759       ERIC   187       0    M\n",
              " 187    71566      ELTON   188       0    M\n",
              " 188    71383   VALDECIR   189       0    M\n",
              " 189    70454     WESLEI   190       0    M\n",
              " 190    69835   NATANAEL   191       0    M\n",
              " 191    69692      ERICK   192       0    M\n",
              " 192    68920    AGNALDO   193       0    M\n",
              " 193    68526      KAUAN   194       0    M\n",
              " 194    67758      ODAIR   195       0    M\n",
              " 195    67207     WALTER   196       0    M\n",
              " 196    66669    ARLINDO   197       0    M\n",
              " 197    65736    MICHAEL   198       0    M\n",
              " 198    65444     ALVARO   199       0    M\n",
              " 199    65435    GEOVANE   200       0    M\n",
              " \n",
              " [200 rows x 5 columns]]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 5
        }
      ]
    },
    {
      "metadata": {
        "id": "swTtxsp3xhmg",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "nomes = pd.concat(frames)['nome'].to_frame()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "jYI_nw1bx0ZT",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 224
        },
        "outputId": "d3f48c41-0de1-41ca-db72-178ab1b7fa10"
      },
      "cell_type": "code",
      "source": [
        "nomes.sample(5)"
      ],
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>nome</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>104</th>\n",
              "      <td>EVA</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>92</th>\n",
              "      <td>DIOGO</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>141</th>\n",
              "      <td>NEIDE</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>47</th>\n",
              "      <td>LUCIA</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>196</th>\n",
              "      <td>IVONETE</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "        nome\n",
              "104      EVA\n",
              "92     DIOGO\n",
              "141    NEIDE\n",
              "47     LUCIA\n",
              "196  IVONETE"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 7
        }
      ]
    },
    {
      "metadata": {
        "id": "NXuU1IUz2jnJ",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "# Incluindo ID dos alunos"
      ]
    },
    {
      "metadata": {
        "id": "QsOlbPOH2Sxm",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "np.random.seed(123)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "lOKUmPXt2zKe",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "d41717e4-774f-496d-a905-a8ca26898299"
      },
      "cell_type": "code",
      "source": [
        "total_alunos = len(nomes)\n",
        "total_alunos"
      ],
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "400"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 9
        }
      ]
    },
    {
      "metadata": {
        "id": "NSZZyQD627Fn",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "nomes[\"id_aluno\"] = np.random.permutation(total_alunos) + 1"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "phR4SHAt3E_3",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 195
        },
        "outputId": "ae5d034b-5dd0-4a9b-931a-f5985d88936e"
      },
      "cell_type": "code",
      "source": [
        "nomes.sample(5)"
      ],
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>nome</th>\n",
              "      <th>id_aluno</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>188</th>\n",
              "      <td>POLIANA</td>\n",
              "      <td>369</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>57</th>\n",
              "      <td>DAIANE</td>\n",
              "      <td>303</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>FRANCISCO</td>\n",
              "      <td>382</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>182</th>\n",
              "      <td>CHARLES</td>\n",
              "      <td>33</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>30</th>\n",
              "      <td>ANDERSON</td>\n",
              "      <td>111</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "          nome  id_aluno\n",
              "188    POLIANA       369\n",
              "57      DAIANE       303\n",
              "3    FRANCISCO       382\n",
              "182    CHARLES        33\n",
              "30    ANDERSON       111"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 11
        }
      ]
    },
    {
      "metadata": {
        "id": "-GFdbNbI3Gyn",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "dominios = ['@dominiodoemail.com.br', '@servicodoemail.com']\n",
        "nomes['dominio'] = np.random.choice(dominios, total_alunos)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "_CCuE4e9355N",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 195
        },
        "outputId": "f8586cb7-55ea-42df-f105-ad556af61af6"
      },
      "cell_type": "code",
      "source": [
        "nomes.sample(5)"
      ],
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>nome</th>\n",
              "      <th>id_aluno</th>\n",
              "      <th>dominio</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>84</th>\n",
              "      <td>SEVERINO</td>\n",
              "      <td>78</td>\n",
              "      <td>@dominiodoemail.com.br</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>JOSE</td>\n",
              "      <td>331</td>\n",
              "      <td>@servicodoemail.com</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>179</th>\n",
              "      <td>IAGO</td>\n",
              "      <td>340</td>\n",
              "      <td>@dominiodoemail.com.br</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>SANDRA</td>\n",
              "      <td>174</td>\n",
              "      <td>@dominiodoemail.com.br</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>171</th>\n",
              "      <td>TERESINHA</td>\n",
              "      <td>45</td>\n",
              "      <td>@dominiodoemail.com.br</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "          nome  id_aluno                 dominio\n",
              "84    SEVERINO        78  @dominiodoemail.com.br\n",
              "0         JOSE       331     @servicodoemail.com\n",
              "179       IAGO       340  @dominiodoemail.com.br\n",
              "10      SANDRA       174  @dominiodoemail.com.br\n",
              "171  TERESINHA        45  @dominiodoemail.com.br"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 13
        }
      ]
    },
    {
      "metadata": {
        "id": "6G24smfL363Q",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "nomes['email'] = nomes.nome.str.cat(nomes.dominio).str.lower()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "Sn7M84lw4Nil",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 195
        },
        "outputId": "c608f5e3-5005-4207-c201-2ee21afa5fc5"
      },
      "cell_type": "code",
      "source": [
        "nomes.sample(5)"
      ],
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>nome</th>\n",
              "      <th>id_aluno</th>\n",
              "      <th>dominio</th>\n",
              "      <th>email</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>114</th>\n",
              "      <td>ARTHUR</td>\n",
              "      <td>320</td>\n",
              "      <td>@dominiodoemail.com.br</td>\n",
              "      <td>arthur@dominiodoemail.com.br</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>57</th>\n",
              "      <td>DAIANE</td>\n",
              "      <td>303</td>\n",
              "      <td>@servicodoemail.com</td>\n",
              "      <td>daiane@servicodoemail.com</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>85</th>\n",
              "      <td>HELENA</td>\n",
              "      <td>221</td>\n",
              "      <td>@dominiodoemail.com.br</td>\n",
              "      <td>helena@dominiodoemail.com.br</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>183</th>\n",
              "      <td>IRACI</td>\n",
              "      <td>8</td>\n",
              "      <td>@dominiodoemail.com.br</td>\n",
              "      <td>iraci@dominiodoemail.com.br</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>189</th>\n",
              "      <td>DALVA</td>\n",
              "      <td>137</td>\n",
              "      <td>@dominiodoemail.com.br</td>\n",
              "      <td>dalva@dominiodoemail.com.br</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "       nome  id_aluno                 dominio                         email\n",
              "114  ARTHUR       320  @dominiodoemail.com.br  arthur@dominiodoemail.com.br\n",
              "57   DAIANE       303     @servicodoemail.com     daiane@servicodoemail.com\n",
              "85   HELENA       221  @dominiodoemail.com.br  helena@dominiodoemail.com.br\n",
              "183   IRACI         8  @dominiodoemail.com.br   iraci@dominiodoemail.com.br\n",
              "189   DALVA       137  @dominiodoemail.com.br   dalva@dominiodoemail.com.br"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 15
        }
      ]
    },
    {
      "metadata": {
        "id": "rWFG7JlcAEwc",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "# Criando a tabela Cursos"
      ]
    },
    {
      "metadata": {
        "id": "6j05EJoi_yXg",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 85
        },
        "outputId": "a5a83708-c3a7-42b7-8c3d-193b6404feee"
      },
      "cell_type": "code",
      "source": [
        "!pip3 install html5lib\n",
        "!pip3 install lxml"
      ],
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Requirement already satisfied: html5lib in /usr/local/lib/python3.6/dist-packages (1.0.1)\n",
            "Requirement already satisfied: six>=1.9 in /usr/local/lib/python3.6/dist-packages (from html5lib) (1.11.0)\n",
            "Requirement already satisfied: webencodings in /usr/local/lib/python3.6/dist-packages (from html5lib) (0.5.1)\n",
            "Requirement already satisfied: lxml in /usr/local/lib/python3.6/dist-packages (4.2.5)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "V3QDmWgKAl_O",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "import html5lib"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "enuJcHMuAqER",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 370
        },
        "outputId": "cbf41552-3019-4eaf-d84c-a10c0f949065"
      },
      "cell_type": "code",
      "source": [
        "url = 'http://tabela-cursos.herokuapp.com/index.html'\n",
        "cursos = pd.read_html(url)\n",
        "cursos"
      ],
      "execution_count": 20,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[                          Nome do curso\n",
              " 0                 Lógica de programação\n",
              " 1                         Java para Web\n",
              " 2                           C# para Web\n",
              " 3                         Ruby on Rails\n",
              " 4                      Cursos de Python\n",
              " 5                         PHP com MySql\n",
              " 6                         .NET para web\n",
              " 7            Novas integrações com Java\n",
              " 8                          TDD com Java\n",
              " 9                   Código limpo com C#\n",
              " 10  Preparatório para certificação Java\n",
              " 11                      Hardware básico\n",
              " 12                Persistência com .NET\n",
              " 13                  Desenvolvendo jogos\n",
              " 14                     Análise de dados\n",
              " 15                   Estatística básica\n",
              " 16                  Internet das coisas\n",
              " 17                Programação funcional\n",
              " 18                Boas práticas em Java\n",
              " 19          Orientação objetos com Java]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 20
        }
      ]
    },
    {
      "metadata": {
        "id": "yF95u6gjAz9x",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "6d312ca8-c6a6-4523-b6ba-0cf48bcb32ae"
      },
      "cell_type": "code",
      "source": [
        "type(cursos)"
      ],
      "execution_count": 21,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "list"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 21
        }
      ]
    },
    {
      "metadata": {
        "id": "U9OaBLe1BCnD",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "cursos = cursos[0]"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "Lgs_KUwFBF8G",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "cae0d7c6-0f80-4363-ae05-b5726ccc807b"
      },
      "cell_type": "code",
      "source": [
        "type(cursos)"
      ],
      "execution_count": 24,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "pandas.core.frame.DataFrame"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 24
        }
      ]
    },
    {
      "metadata": {
        "id": "mX4W0KOpBKh1",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 195
        },
        "outputId": "f12dacb4-fa55-447a-ed2d-ae68f28e0dac"
      },
      "cell_type": "code",
      "source": [
        "cursos.head()"
      ],
      "execution_count": 28,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Nome do curso</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Lógica de programação</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Java para Web</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>C# para Web</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>Ruby on Rails</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Cursos de Python</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "           Nome do curso\n",
              "0  Lógica de programação\n",
              "1          Java para Web\n",
              "2            C# para Web\n",
              "3          Ruby on Rails\n",
              "4       Cursos de Python"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 28
        }
      ]
    },
    {
      "metadata": {
        "id": "enq_vtyNCpps",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        "# Alterando o index de cursos"
      ]
    },
    {
      "metadata": {
        "id": "_koRYbS7BM7d",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "cursos = cursos.rename(columns={'Nome do curso' : 'nome_do_curso'})"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "sFQLU45gC3m0",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 106
        },
        "outputId": "ce66afbb-fd64-450f-97ce-bb2c23259cce"
      },
      "cell_type": "code",
      "source": [
        "cursos.head(2)"
      ],
      "execution_count": 31,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>nome_do_curso</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Lógica de programação</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Java para Web</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "           nome_do_curso\n",
              "0  Lógica de programação\n",
              "1          Java para Web"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 31
        }
      ]
    },
    {
      "metadata": {
        "id": "1yB5jnREC8rU",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "cursos['id'] = cursos.index + 1"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "wEVs9MzuDGzE",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 204
        },
        "outputId": "27d63e94-6580-4065-a0a9-97991acbec84"
      },
      "cell_type": "code",
      "source": [
        "cursos.head()"
      ],
      "execution_count": 34,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>nome_do_curso</th>\n",
              "      <th>id</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Lógica de programação</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Java para Web</td>\n",
              "      <td>2</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>C# para Web</td>\n",
              "      <td>3</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>Ruby on Rails</td>\n",
              "      <td>4</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Cursos de Python</td>\n",
              "      <td>5</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "           nome_do_curso  id\n",
              "0  Lógica de programação   1\n",
              "1          Java para Web   2\n",
              "2            C# para Web   3\n",
              "3          Ruby on Rails   4\n",
              "4       Cursos de Python   5"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 34
        }
      ]
    },
    {
      "metadata": {
        "id": "0kJXsjEwDJcM",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        "cursos = cursos.set_index('id')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "2C2eR9HwDVvg",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 235
        },
        "outputId": "abf6ec03-d326-4921-c56b-b9fbfe11fb05"
      },
      "cell_type": "code",
      "source": [
        "cursos.head(5)"
      ],
      "execution_count": 38,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>nome_do_curso</th>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>id</th>\n",
              "      <th></th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Lógica de programação</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Java para Web</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>C# para Web</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Ruby on Rails</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>Cursos de Python</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "            nome_do_curso\n",
              "id                       \n",
              "1   Lógica de programação\n",
              "2           Java para Web\n",
              "3             C# para Web\n",
              "4           Ruby on Rails\n",
              "5        Cursos de Python"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 38
        }
      ]
    },
    {
      "metadata": {
        "id": "N_kpGlBJDXNj",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}