136 lines
3.2 KiB
Plaintext
136 lines
3.2 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "import-libs",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"from pyprojroot import here"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "load-data",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"data = pd.read_csv(here() / 'data/raw/experimental_data.csv', encoding='utf-8')\n",
|
|
"print(f\"Loaded {data.shape[0]} measurements\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "filter-complete",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Filter to complete measurements only\n",
|
|
"data = data[data['Complete?'] == 'Complete']\n",
|
|
"print(f\"Complete measurements: {data.shape[0]}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "filter-test",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Remove test entries\n",
|
|
"data = data[data['Study ID'] != 26]\n",
|
|
"print(f\"After removing test entries: {data.shape[0]}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "clean-names",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Clean up text fields\n",
|
|
"data['Country'] = data['Country'].str.strip()\n",
|
|
"data['Institution'] = data['Institution'].str.strip()\n",
|
|
"data['Lab Name'] = data['Lab Name'].str.strip()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "add-country-code",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Add country codes\n",
|
|
"from utils import country_abbrev\n",
|
|
"data['Country Code'] = data['Country'].map(country_abbrev)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "calculate-metrics",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Calculate derived metrics\n",
|
|
"data['CAR'] = data['Coincidence Rate (Hz)'] / np.sqrt(data['Singles Rate A (Hz)'] * data['Singles Rate B (Hz)'])\n",
|
|
"data['Bell Violation Sigma'] = (data['Bell Parameter (S)'] - 2.0) / 0.1 # Simplified error"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "categorize",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Categorize fidelity\n",
|
|
"def fidelity_category(f):\n",
|
|
" if f >= 0.98:\n",
|
|
" return 'Excellent'\n",
|
|
" elif f >= 0.95:\n",
|
|
" return 'Good'\n",
|
|
" elif f >= 0.90:\n",
|
|
" return 'Acceptable'\n",
|
|
" else:\n",
|
|
" return 'Poor'\n",
|
|
"\n",
|
|
"data['Fidelity Category'] = data['Entanglement Fidelity'].apply(fidelity_category)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "save-data",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Save processed data\n",
|
|
"data.to_csv(here() / 'data/base/data.csv', index=False)\n",
|
|
"data.to_pickle(here() / 'data/base/data.pkl')\n",
|
|
"print(f\"Saved {data.shape[0]} processed measurements\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"name": "python",
|
|
"version": "3.9.0"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|