Initial
This commit is contained in:
135
notebooks/make_basedata.ipynb
Normal file
135
notebooks/make_basedata.ipynb
Normal file
@@ -0,0 +1,135 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "import-libs",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"from pyprojroot import here"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "load-data",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = pd.read_csv(here() / 'data/raw/experimental_data.csv', encoding='utf-8')\n",
|
||||
"print(f\"Loaded {data.shape[0]} measurements\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "filter-complete",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Filter to complete measurements only\n",
|
||||
"data = data[data['Complete?'] == 'Complete']\n",
|
||||
"print(f\"Complete measurements: {data.shape[0]}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "filter-test",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Remove test entries\n",
|
||||
"data = data[data['Study ID'] != 26]\n",
|
||||
"print(f\"After removing test entries: {data.shape[0]}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "clean-names",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Clean up text fields\n",
|
||||
"data['Country'] = data['Country'].str.strip()\n",
|
||||
"data['Institution'] = data['Institution'].str.strip()\n",
|
||||
"data['Lab Name'] = data['Lab Name'].str.strip()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "add-country-code",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Add country codes\n",
|
||||
"from utils import country_abbrev\n",
|
||||
"data['Country Code'] = data['Country'].map(country_abbrev)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "calculate-metrics",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Calculate derived metrics\n",
|
||||
"data['CAR'] = data['Coincidence Rate (Hz)'] / np.sqrt(data['Singles Rate A (Hz)'] * data['Singles Rate B (Hz)'])\n",
|
||||
"data['Bell Violation Sigma'] = (data['Bell Parameter (S)'] - 2.0) / 0.1 # Simplified error"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "categorize",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Categorize fidelity\n",
|
||||
"def fidelity_category(f):\n",
|
||||
" if f >= 0.98:\n",
|
||||
" return 'Excellent'\n",
|
||||
" elif f >= 0.95:\n",
|
||||
" return 'Good'\n",
|
||||
" elif f >= 0.90:\n",
|
||||
" return 'Acceptable'\n",
|
||||
" else:\n",
|
||||
" return 'Poor'\n",
|
||||
"\n",
|
||||
"data['Fidelity Category'] = data['Entanglement Fidelity'].apply(fidelity_category)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "save-data",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Save processed data\n",
|
||||
"data.to_csv(here() / 'data/base/data.csv', index=False)\n",
|
||||
"data.to_pickle(here() / 'data/base/data.pkl')\n",
|
||||
"print(f\"Saved {data.shape[0]} processed measurements\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.9.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user