Files
qu-en/notebooks/make_basedata.ipynb
Jalmari Tuominen 60e6abc831 Initial
2025-11-27 08:59:35 +02:00

136 lines
3.2 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "import-libs",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from pyprojroot import here"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "load-data",
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv(here() / 'data/raw/experimental_data.csv', encoding='utf-8')\n",
"print(f\"Loaded {data.shape[0]} measurements\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "filter-complete",
"metadata": {},
"outputs": [],
"source": [
"# Filter to complete measurements only\n",
"data = data[data['Complete?'] == 'Complete']\n",
"print(f\"Complete measurements: {data.shape[0]}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "filter-test",
"metadata": {},
"outputs": [],
"source": [
"# Remove test entries\n",
"data = data[data['Study ID'] != 26]\n",
"print(f\"After removing test entries: {data.shape[0]}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "clean-names",
"metadata": {},
"outputs": [],
"source": [
"# Clean up text fields\n",
"data['Country'] = data['Country'].str.strip()\n",
"data['Institution'] = data['Institution'].str.strip()\n",
"data['Lab Name'] = data['Lab Name'].str.strip()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "add-country-code",
"metadata": {},
"outputs": [],
"source": [
"# Add country codes\n",
"from utils import country_abbrev\n",
"data['Country Code'] = data['Country'].map(country_abbrev)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "calculate-metrics",
"metadata": {},
"outputs": [],
"source": [
"# Calculate derived metrics\n",
"data['CAR'] = data['Coincidence Rate (Hz)'] / np.sqrt(data['Singles Rate A (Hz)'] * data['Singles Rate B (Hz)'])\n",
"data['Bell Violation Sigma'] = (data['Bell Parameter (S)'] - 2.0) / 0.1 # Simplified error"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "categorize",
"metadata": {},
"outputs": [],
"source": [
"# Categorize fidelity\n",
"def fidelity_category(f):\n",
" if f >= 0.98:\n",
" return 'Excellent'\n",
" elif f >= 0.95:\n",
" return 'Good'\n",
" elif f >= 0.90:\n",
" return 'Acceptable'\n",
" else:\n",
" return 'Poor'\n",
"\n",
"data['Fidelity Category'] = data['Entanglement Fidelity'].apply(fidelity_category)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "save-data",
"metadata": {},
"outputs": [],
"source": [
"# Save processed data\n",
"data.to_csv(here() / 'data/base/data.csv', index=False)\n",
"data.to_pickle(here() / 'data/base/data.pkl')\n",
"print(f\"Saved {data.shape[0]} processed measurements\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.9.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}