exciteproject · azeddinebouabdallah · Jun 3, 2022 · Jun 3, 2022
diff --git a/Evaluation/Ours/Datasets/Dataset/CrossValidation/all.txt b/Evaluation/Ours/Datasets/Dataset/CrossValidation/all.txt
diff --git a/Evaluation/Ours/Datasets/Dataset/CrossValidation/allGroundTruth.txt b/Evaluation/Ours/Datasets/Dataset/CrossValidation/allGroundTruth.txt
diff --git a/Evaluation/Ours/Datasets/Dataset/CrossValidation/allText.txt b/Evaluation/Ours/Datasets/Dataset/CrossValidation/allText.txt
diff --git a/Evaluation/Ours/Datasets/Dataset/CrossValidation/allTextOld.txt b/Evaluation/Ours/Datasets/Dataset/CrossValidation/allTextOld.txt
diff --git a/Evaluation/Ours/Datasets/Dataset/CrossValidation/cleanGroundTruth.ipynb b/Evaluation/Ours/Datasets/Dataset/CrossValidation/cleanGroundTruth.ipynb
@@ -0,0 +1,191 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"./groundTruthAll.txt\", \"w\") as f:    \n",
+    "    for file in os.scandir(\"../../New/TrainingInput\"):\n",
+    "        if file.name.endswith(\".txt\"):\n",
+    "            with open(file.path, \"r\") as f1:\n",
+    "                f.write(f1.read())\n",
+    "                f.write(\"\\n\")\n",
+    "                f1.close()\n",
+    "            #os.remove(file.path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"./test.txt\", \"r\") as f:\n",
+    "    lines = f.read().splitlines()\n",
+    "    newLines = []\n",
+    "    for l in lines:\n",
+    "        if l == \"\" or l == \" \":\n",
+    "            continue\n",
+    "        else:\n",
+    "            newLines.append(l)\n",
+    "\n",
+    "with open('test.txt', 'w') as f:\n",
+    "    f.writelines(\"\\n\".join(map(str, newLines)))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for fold in range(10):\n",
+    "    with open(\"./allGroundTruth.txt\", \"r\") as f:\n",
+    "        lines = f.read().split('\\n')\n",
+    "\n",
+    "        with open(\"./training/training_\"+str(fold)+\".txt\") as f1:\n",
+    "            lines1 = f1.read().split('\\n')\n",
+    "            refLines = []\n",
+    "            for l in lines1:\n",
+    "                refLines.append(lines[(int(l))])\n",
+    "            \n",
+    "            with open(\"../TrainingInput/data_\"+str(fold)+\".txt\", \"w\") as f2:\n",
+    "                f2.writelines(\"\\n\".join(map(str, refLines)))\n",
+    "                f2.close()\n",
+    "\n",
+    "        #print(lines[indexes])\n",
+    "\n",
+    "\n",
+    "    #print(lines.split('\\n'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for fold in range(10):\n",
+    "    with open(\"./allGroundTruth.txt\", \"r\") as f:\n",
+    "        lines = f.read().split('\\n')\n",
+    "\n",
+    "        with open(\"./testing/testing_\"+str(fold)+\".txt\") as f1:\n",
+    "            lines1 = f1.read().split('\\n')\n",
+    "            refLines = []\n",
+    "            for l in lines1:\n",
+    "                refLines.append(lines[(int(l))])\n",
+    "            \n",
+    "            with open(\"../TrainingInput/data_\"+str(fold)+\".txt\", \"w\") as f2:\n",
+    "                f2.writelines(\"\\n\".join(map(str, refLines)))\n",
+    "                f2.close()\n",
+    "\n",
+    "            with open(\"../GroundTruth/groundTruth_\"+str(fold)+\".txt\", \"w\") as f3:\n",
+    "                f3.writelines(\"\\n\".join(map(str, refLines)))\n",
+    "                f3.close()\n",
+    "\n",
+    "        #print(lines[indexes])\n",
+    "\n",
+    "\n",
+    "    #print(lines.split('\\n'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i in range(10):\n",
+    "\n",
+    "    with open(\"../RefLines/refLines_\"+str(i)+\".txt\") as f:\n",
+    "        lines = f.read().split('\\n')\n",
+    "        newLines = []\n",
+    "        for l in lines:\n",
+    "            if l == '':\n",
+    "                continue\n",
+    "            newLines.append(l)\n",
+    "    with open(\"../RefLines/refLines_\"+str(i)+\".txt\", \"w\") as f1:\n",
+    "        f1.writelines(\"\\n\".join(map(str, newLines)))\n",
+    "        f1.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i in range(10):\n",
+    "    with open(\"./testing/testing_\"+str(i)+\".txt\") as f:\n",
+    "        indexes = f.read().split('\\n')\n",
+    "        with open(\"./allText.txt\", \"r\") as textFile:\n",
+    "            content = textFile.read().split(\"\\n\")\n",
+    "\n",
+    "        refLines = []\n",
+    "        for index in indexes:\n",
+    "            #print(content[int(index)])\n",
+    "            refLines.append(content[int(index)])\n",
+    "\n",
+    "        with open(\"../RefLines/refLines_\"+str(i)+\".txt\", \"w\") as r:\n",
+    "            r.write(\"\\n\".join(refLines))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"./allGroundTruth.txt\") as f:\n",
+    "    file = f.read().split('\\n')\n",
+    "\n",
+    "    for k in range(10):\n",
+    "        with open(\"./testing/testing_\"+str(k)+\".txt\", \"r\") as f1:\n",
+    "            indexes = f1.read().split('\\n')\n",
+    "            new= []\n",
+    "            for i in indexes:\n",
+    "                new.append(file[int(i)])\n",
+    "            \n",
+    "            with open('../GroundTruth/groundTruth_'+str(k)+\".txt\", \"w\") as f2:\n",
+    "                f2.write(\"\\n\".join(new))\n",
+    "            \n"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "3a865692b08f5326a94ffda11e448d3aaa26e4dbe54761a90b9afa993c954b42"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.7.4 64-bit ('base': conda)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}