diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 00000000..a814c4c4 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,42 @@ +name: Build Jekyll site +on: + workflow_dispatch: + push: + branches: ["master"] +permissions: + contents: read + pages: write + id-token: write +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Ruby + uses: ruby/setup-ruby@v1.285.0 + with: + ruby-version: "3.1" # Not needed with a .ruby-version file + bundler-cache: true # runs 'bundle install' and caches installed gems automatically + cache-version: 0 # Increment this number if you need to re-download cached gems + - name: Setup Pages + uses: actions/configure-pages@v5 + - name: Build with Jekyll + # Outputs to the './_site' directory by default + run: bundle exec jekyll build --baseurl "${{ steps.pages.outputs.base_path }}" + env: + JEKYLL_ENV: production + - name: Upload artifact + # Automatically uploads an artifact from the './_site' directory by default + uses: actions/upload-pages-artifact@v3 + + deploy: + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml new file mode 100644 index 00000000..3f975f8b --- /dev/null +++ b/.github/workflows/stale.yaml @@ -0,0 +1,19 @@ +name: 'Close stale issues and PRs' +on: + schedule: + - cron: '30 1 * * *' + +jobs: + stale: + runs-on: ubuntu-latest + steps: + - uses: actions/stale@v10 + with: + stale-issue-message: 'This issue was marked as stale because it has been open 15 days with no activity. Remove the stale label or comment on this issue or this issue will be closed in 5 days.' + stale-pr-message: 'This PR was marked as stale because it has been open 15 days with no activity. Remove the stale label or comment on this pull request or this pull request will be closed in 5 days.' + close-issue-message: 'This issue was closed because it has been stale for 5 days with no activity.' + close-pr-message: 'This PR was closed because it has been stale for 5 days with no activity.' + days-before-issue-stale: 15 + days-before-pr-stale: 15 + days-before-issue-close: 5 + days-before-pr-close: 5 \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 00000000..d75bbcf3 --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,31 @@ +name: Test Jekyll site + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +permissions: + contents: read + pages: write + id-token: write +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Ruby + uses: ruby/setup-ruby@v1.285.0 + with: + ruby-version: "3.1" # Not needed with a .ruby-version file + bundler-cache: true # runs 'bundle install' and caches installed gems automatically + cache-version: 0 # Increment this number if you need to re-download cached gems + - name: Build with Jekyll + # Outputs to the './_site' directory by default + run: bundle exec jekyll build + env: + JEKYLL_ENV: production + - name: Run htmlproofer + run: bundle exec htmlproofer --log-level debug ./_site diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..58c57d63 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 # Use the latest stable tag + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - repo: https://github.com/DavidAnson/markdownlint-cli2 + rev: v0.20.0 + hooks: + - id: markdownlint-cli2 \ No newline at end of file diff --git a/.travis-scripts/html-proofer b/.travis-scripts/html-proofer deleted file mode 100755 index 56ff5672..00000000 --- a/.travis-scripts/html-proofer +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -usage () { - echo "usage: $(basename $0) [--debug]" - exit 2 -} - -debug_option= - -if [ "$1" == "--debug" ] -then - debug_option="--log-level debug" -elif [ -n "$1" ] -then - echo "Unsupported option ($1)" - usage -fi - -bundle exec jekyll build -bundle exec htmlproofer ${url_ignore_option} ${debug_option} --check-html ./_site -status=$? -if [ ${status} -ne 0 -a -z "${debug_option}" ] -then - echo "" - echo "htmlproofer failed to validate site contents: use --debug for more information" -fi - -exit ${status} diff --git a/.travis-scripts/url-ignore b/.travis-scripts/url-ignore deleted file mode 100644 index a55d0915..00000000 --- a/.travis-scripts/url-ignore +++ /dev/null @@ -1,11 +0,0 @@ -# Do not use this file to suppress checks against URLs any more. -# Instead please add the attribute tag "data-proofer-ignore" to your link, -# see -# https://github.com/gjtorikian/html-proofer#ignoring-content -# -# To use this technique with a markdown URL, you can use the -# kramdown extension that adds tags to URLs, e.g. -# [link text](http://valid.but.unferifiable.com/){:data-proofer-ignore=""} -# -# N.B. Jekyll's kramdown generator changed to ignore plain attributes attached -# to links, but using an empty value works just fine. diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 2c011cee..00000000 --- a/.travis.yml +++ /dev/null @@ -1,20 +0,0 @@ -sudo: required - -language: ruby - -services: - - docker - -before_install: - - docker pull hepsoftwarefoundation/hsf-jekyll - - # Skip install as the container does all the work -install: /bin/true - -script: - - docker run --volume $(pwd):/srv/jekyll --interactive --tty --user $(id -u) --env HOME=/tmp hepsoftwarefoundation/hsf-jekyll /bin/sh -c /srv/jekyll/.travis-scripts/html-proofer - -# branch whitelist, only for GitHub Pages -branches: - only: - - master diff --git a/Gemfile b/Gemfile index e7350e42..c8c63530 100644 --- a/Gemfile +++ b/Gemfile @@ -3,9 +3,3 @@ source "https://rubygems.org" gem 'jekyll-feed' gem "github-pages", ">= 150" gem "html-proofer" - -# Suggested by Jekyll -gem 'wdm', '>= 0.1.0' if Gem.win_platform? - -# Required on Windows for recent versions (>= 3.6) of Jekyll -gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw] diff --git a/README.md b/README.md index 7c7f3145..33a2e458 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,15 @@ # ML4SCI.github.io + Machine Learning for Science github site -* Live at https://ml4sci.org +- Live at https://ml4sci.org ## About ML4SCI -Machine Learning for Science (ML4Sci) is an open-source organization that brings together modern machine learning techniques and applies them to cutting edge problems in Science, Technology, Engineering, and Math (STEM). + +Machine Learning for Science (ML4Sci) is an open-source organization that brings together modern machine learning techniques and applies them to cutting edge problems in Science, Technology, Engineering, and Math (STEM). ## ML4SCI in GSoC 2022 + The ML4Sci open source organization is participating in the [2022 Google Summer of Code](https://summerofcode.withgoogle.com/). If you are a student interested in our [projects](https://ml4sci.org/activities/gsoc.html) please check our [ideas page](https://ml4sci.org/gsoc/2022/summary.html). ML4Sci is an umbrella organization that welcomes other projects and organizations related to machine-learning for science. Please contact the admins at [ml4-sci@cern.ch](ml4-sci@cern.ch) if you are interested in participating as a project. ![GSOC](https://ml4sci.org/images/GSoC/GSoC-icon-192.png) diff --git a/_activities/gsoc2026.md b/_activities/gsoc2026.md new file mode 100644 index 00000000..b3b3735a --- /dev/null +++ b/_activities/gsoc2026.md @@ -0,0 +1,55 @@ +--- +title: "Google Summer of Code 2026" +author: "Emanuele Usai" +layout: default +--- + +# ![ML4SCI](/images/CERN-HSF-GSoC-logo.png){:height="100px"} Google Summer of Code 2026 + +## For Students: The 2026 GSoC term has not yet started. Please check this website for more information on February 19th + +## Introduction + +In 2026 ML4SCI plans to participate in the program as a GSoC umbrella organization. +The ML4SCI organization plans to partner with the [Google Summer of Code](https://summerofcode.withgoogle.com) in 2026 to broaden student participation in machine learning projects over a wide variety of scientific fields. +ML4SCI participants will be mentored by scientists at top research universities and laboratories on research projects at the cutting edge of science. +Projects span a wide range of scientific domains, including physics, astronomy, planetary science, quantum information science and others. + +### For Students + +In 2026 GSoC students work with their mentors for 175 hrs to produce open-source codes that apply machine learning solutions to solve science problems. Projects span three evaluation periods that allow for students and mentors to collaborate on their project and evaluate student progress. Detailed rules for the GSOC program can be found on Google's [rules page](https://summerofcode.withgoogle.com/rules/). +Interested students should look at the ideas page and contact the mentors. Candidates will be asked to complete an evaluation test for each project they apply to demonstrate the skills needed for the respective projects. +In the next step, students will produce a proposal which will be evaluated for final student selection. + + + +Please see the [official GSoC Timeline](https://summerofcode.withgoogle.com/how-it-works/) + +## Projects in 2026 + +{% assign current_year = "2026" %} +{% include gsoc_project_list.ext year=current_year %} + +## Participating Organizations in 2026 + +{% include gsoc_organization_list.ext year=current_year %} + +## Summary + +[Full list of Proposal Ideas](/gsoc/2026/summary.html) + +[Full list of Mentors](/gsoc/2026/mentors.html) + +--- + +## Administrators + +[Prof. Sergei Gleyzer (University of Alabama)](https://sergeigleyzer.com/) + +[Prof. Emanuele Usai (University of Alabama)](https://emanueleusai.com) + +[Dr. Patrick Peplowski (JHUAPL)](https://civspace.jhuapl.edu/people/patrick-peplowski) + +## Contacts + +[_ML4SCI GSoC Admins_](mailto:ml4-sci@cern.ch) diff --git a/_gsocorgs/2026/IITDhanbad.md b/_gsocorgs/2026/IITDhanbad.md new file mode 100644 index 00000000..9dce2de5 --- /dev/null +++ b/_gsocorgs/2026/IITDhanbad.md @@ -0,0 +1,12 @@ +--- +title: "Indian Institute of Technology Dhanbad" +author: "Neeraj Anand" +layout: default +organization: IITDhanbad +logo: IIT_Dhanbad.png +description: | + + The [Indian Institute of Technology, Dhanbad](https://www.iitism.ac.in) is a public technical university located in Dhanbad, India. IIT Dhanbad is an Institute of National Importance, and is ranked among the premier engineering institutions of India. +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/README.md b/_gsocorgs/2026/README.md new file mode 100644 index 00000000..2c96a10b --- /dev/null +++ b/_gsocorgs/2026/README.md @@ -0,0 +1,14 @@ +# To add a new organization + +Proposals are attached to an organization (e.g. CERN, Fermilab…). If you are a new organization, you need to create a MD file describing your organization in this directory. This is a very simple file, containing only a (short, one paragraph) front matter section that defines the attributes of your organization. + +A proposal is attached to an organization by its attribute organization that must match (case insensitive) the organization attribute defined in organization MD file. This attribute can be a single value or a list. For a list, use the following syntax in the front matter section: + +organization: + +- CERN +- Fermilab + +To create a new organization, copy `_gsocorgs/2026/cern.md`, create a file for your organization and edit its contents as appropriate. The file name should be in lower case matching the organization attribute. Each organization must have a logo in the folder hsf.github.io/images in png format, named `ORG-logo.png` upper case. For example for the organization CC-IN2P3, the organization file should be named `cc-in2p3.md` and the logo file should be `CC-IN2P3-logo.png`. + +Note that most organizations already have MD files in the previous year folders - they can be copied in the 2021 folder (instead of modifying CERN.md) and changed to match the rules above. diff --git a/_gsocorgs/2026/alabama.md b/_gsocorgs/2026/alabama.md new file mode 100644 index 00000000..4f3c0b49 --- /dev/null +++ b/_gsocorgs/2026/alabama.md @@ -0,0 +1,12 @@ +--- +title: "University of Alabama" +author: "Omar Zapata" +layout: default +organization: Alabama +logo: UA-logo.jpg +description: | + The [University of Alabama](https://www.ua.edu/) is a public research university in Tuscaloosa, Alabama. Established in 1820, the University of Alabama is the oldest and largest of the public universities in Alabama as well as the flagship of the University of Alabama System + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/aub.md b/_gsocorgs/2026/aub.md new file mode 100644 index 00000000..cf8f4561 --- /dev/null +++ b/_gsocorgs/2026/aub.md @@ -0,0 +1,13 @@ +--- +title: "American University of Beirut" +author: "Emanuele Usai" +layout: default +organization: AUB +logo: AUB_Seal_3.png +description: | + The American University of Beirut (AUB) is a private, non-sectarian, and independent university chartered in New York with its campus in Beirut, Lebanon + + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/bits pilani goa.md b/_gsocorgs/2026/bits pilani goa.md new file mode 100644 index 00000000..4309836b --- /dev/null +++ b/_gsocorgs/2026/bits pilani goa.md @@ -0,0 +1,13 @@ +--- +title: "BITS Pilani Goa" +author: "Emanuele Usai" +layout: default +organization: BITS Pilani Goa +logo: BITS_Pilani-Logo.svg.png +description: | + Birla Institute of Technology and Science, Pilani – Goa Campus is a private deemed university campus located in Goa, India. + + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/bits pilani hyderabad.md b/_gsocorgs/2026/bits pilani hyderabad.md new file mode 100644 index 00000000..8706b677 --- /dev/null +++ b/_gsocorgs/2026/bits pilani hyderabad.md @@ -0,0 +1,13 @@ +--- +title: "BITS Pilani Hyderabad" +author: "Emanuele Usai" +layout: default +organization: BITS Pilani Hyderabad +logo: BITS_Pilani-Logo.svg.png +description: | + Birla Institute of Technology and Science, Pilani – Hyderabad Campus is a private deemed university campus located in Hyderabad, India. + + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/brown.md b/_gsocorgs/2026/brown.md new file mode 100644 index 00000000..c57fec04 --- /dev/null +++ b/_gsocorgs/2026/brown.md @@ -0,0 +1,11 @@ +--- +title: "Brown University" +author: "Omar Zapata" +layout: default +organization: Brown +logo: BROWN-logo.png +description: | + Brown University is a private Ivy League research university in Providence, Rhode Island, founded in 1764. +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/catalonia.md b/_gsocorgs/2026/catalonia.md new file mode 100644 index 00000000..715760e6 --- /dev/null +++ b/_gsocorgs/2026/catalonia.md @@ -0,0 +1,12 @@ +--- +title: "Polytechnic University of Catalonia" +author: "Omar Zapata" +layout: default +organization: Catalonia +logo: Logo_UPC.svg.png +description: | + The Technical University of Catalonia, currently referred to as BarcelonaTech, is the largest engineering university in Catalonia, Spain. It also offers programs in other disciplines such as mathematics and architecture. + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/cerium.md b/_gsocorgs/2026/cerium.md new file mode 100644 index 00000000..4f14488f --- /dev/null +++ b/_gsocorgs/2026/cerium.md @@ -0,0 +1,13 @@ +--- +title: "Cerium Labs" +author: "Eric Reinhardt" +layout: default +organization: Cerium +logo: Cerium-Labs.png +description: | + Cerium Laboratories, LLC is a world-class analytical laboratory located in Austin, Texas. + + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/cern.md b/_gsocorgs/2026/cern.md new file mode 100644 index 00000000..b2f3a59a --- /dev/null +++ b/_gsocorgs/2026/cern.md @@ -0,0 +1,11 @@ +--- +title: "CERN" +author: "Benedikt Hegner" +layout: default +organization: CERN +logo: CERN-logo.jpg +description: | + At [CERN](https://home.cern), the European Organization for Nuclear Research, physicists and engineers are probing the fundamental structure of the universe. They use the world's largest and most complex scientific instruments to study the basic constituents of matter – the fundamental particles. +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/cmu.md b/_gsocorgs/2026/cmu.md new file mode 100644 index 00000000..d2eb9a15 --- /dev/null +++ b/_gsocorgs/2026/cmu.md @@ -0,0 +1,13 @@ +--- +title: "Carnegie Mellon University" +author: "Emanuele Usai" +layout: default +organization: CMU +logo: 800px-Carnegie_Mellon_University_seal.svg.png +description: | + Carnegie Mellon University is a private research university based in Pittsburgh, Pennsylvania. + + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/cornell.md b/_gsocorgs/2026/cornell.md new file mode 100644 index 00000000..0e889c15 --- /dev/null +++ b/_gsocorgs/2026/cornell.md @@ -0,0 +1,11 @@ +--- +title: "Cornell University" +author: "Emanuele Usai" +layout: default +organization: Cornell +logo: 768px-Cornell_University_seal.svg.png +description: | + Cornell University is a private, statutory, Ivy League and land-grant research university in Ithaca, New York. +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/dartmouth.md b/_gsocorgs/2026/dartmouth.md new file mode 100644 index 00000000..f375a696 --- /dev/null +++ b/_gsocorgs/2026/dartmouth.md @@ -0,0 +1,11 @@ +--- +title: "Dartmouth College" +author: "Emanuele Usai" +layout: default +organization: Dartmouth +logo: 717px-Dartmouth_College_shield.svg.png +description: | + Dartmouth College is a private Ivy League research university in Hanover, New Hampshire, United States. +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/davidson.md b/_gsocorgs/2026/davidson.md new file mode 100644 index 00000000..58d673ba --- /dev/null +++ b/_gsocorgs/2026/davidson.md @@ -0,0 +1,13 @@ +--- +title: "Davidson College" +author: "Emanuele Usai" +layout: default +organization: Davidson +logo: Davidson_College_seal.png +description: | + Davidson College is a private liberal arts college in Davidson, North Carolina. + + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/epfl.md b/_gsocorgs/2026/epfl.md new file mode 100644 index 00000000..87d62db7 --- /dev/null +++ b/_gsocorgs/2026/epfl.md @@ -0,0 +1,12 @@ +--- +title: "EPFL" +author: "Emanuele Usai" +layout: default +organization: EPFL +logo: 1024px-Logo_EPFL.svg.png +description: | + The École polytechnique fédérale de Lausanne (EPFL) is a public research university located in Lausanne, Switzerland. It specializes in natural sciences and engineering. It is one of the two Swiss Federal Institutes of Technology, with three main missions: education, research and innovation. + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/erlangen.md b/_gsocorgs/2026/erlangen.md new file mode 100644 index 00000000..0ad39969 --- /dev/null +++ b/_gsocorgs/2026/erlangen.md @@ -0,0 +1,12 @@ +--- +title: "University of Erlangen–Nuremberg" +author: "Omar Zapata" +layout: default +organization: Erlangen +logo: erlangen.png +description: | + University of Erlangen–Nuremberg is a public research university in the cities of Erlangen and Nuremberg in Bavaria, Germany. FAU is a member of the German Research Foundation DFG (Deutsche Forschungsgemeinschaft). + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/fau.md b/_gsocorgs/2026/fau.md new file mode 100644 index 00000000..40de1071 --- /dev/null +++ b/_gsocorgs/2026/fau.md @@ -0,0 +1,12 @@ +--- +title: "University of Erlangen-Nuremberg" +author: "Eric Reinhardt" +layout: default +organization: University of Erlangen-Nuremberg +logo: FAU-logo.jpg +description: | + [University of Erlangen-Nuremberg](https://www.fau.eu/) is a public university in Bavaria, Germany. Founded in 1742, the University's main campuses are located in Erlangen and Nuremberg in Bavaria, Germany with a newer campus opened in 2009 in Busan, South Korea. + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/florida.md b/_gsocorgs/2026/florida.md new file mode 100644 index 00000000..2a76c772 --- /dev/null +++ b/_gsocorgs/2026/florida.md @@ -0,0 +1,11 @@ +--- +title: "University of Florida" +author: "Omar Zapata" +layout: default +organization: Florida +logo: UFL-logo.png +description: | + University of Florida is a public institution that was founded in 1853. +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/fsu.md b/_gsocorgs/2026/fsu.md new file mode 100644 index 00000000..5a853699 --- /dev/null +++ b/_gsocorgs/2026/fsu.md @@ -0,0 +1,11 @@ +--- +title: "Florida State University" +author: "Omar Zapata" +layout: default +organization: FSU +logo: FSU-logo.png +description: | + Florida State University is a public institution that was founded in 1851. +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/georgia.md b/_gsocorgs/2026/georgia.md new file mode 100644 index 00000000..2300d806 --- /dev/null +++ b/_gsocorgs/2026/georgia.md @@ -0,0 +1,11 @@ +--- +title: "University of Georgia" +author: "Emanuele Usai" +layout: default +organization: Georgia +logo: 800px-University_of_Georgia_seal.svg.png +description: | + The University of Georgia (UGA or Georgia) is a public land-grant research university with its main campus in Athens, Georgia. +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/goddard.md b/_gsocorgs/2026/goddard.md new file mode 100644 index 00000000..be2d1830 --- /dev/null +++ b/_gsocorgs/2026/goddard.md @@ -0,0 +1,13 @@ +--- +title: "Johns Hopkins University Applied Physics Laboratory" +author: "Emanuele Usai" +layout: default +organization: Goddard +logo: NASA_logo.svg.png +description: | + The Goddard Space Flight Center (GSFC) is a major NASA space research laboratory located approximately 6.5 miles (10.5 km) northeast of Washington, D.C. in Greenbelt, Maryland, United States. + + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/jhuapl.md b/_gsocorgs/2026/jhuapl.md new file mode 100644 index 00000000..fa090fa4 --- /dev/null +++ b/_gsocorgs/2026/jhuapl.md @@ -0,0 +1,13 @@ +--- +title: "Johns Hopkins University Applied Physics Laboratory" +author: "Omar Zapata" +layout: default +organization: JHUAPL +logo: JHUAPL-logo.png +description: | + The Johns Hopkins University Applied Physics Laboratory (APL) has provided critical contributions to critical challenges with systems engineering and integration, technology research and development, and analysis. + + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/kansas.md b/_gsocorgs/2026/kansas.md new file mode 100644 index 00000000..fd612a76 --- /dev/null +++ b/_gsocorgs/2026/kansas.md @@ -0,0 +1,12 @@ +--- +title: "University of Kansas" +author: "Omar Zapata" +layout: default +organization: Kansas +logo: kansas.png +description: | + The University of Kansas (KU) is a public research university with its main campus in Lawrence, Kansas. The university is a member of the Association of American Universities and is classified among "R1: Doctoral Universities – Very high research activity". Founded March 21, 1865, the university was opened in 1866 under a charter granted by the Kansas State Legislature in 1864. + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/kentucky.md b/_gsocorgs/2026/kentucky.md new file mode 100644 index 00000000..e3bfd3aa --- /dev/null +++ b/_gsocorgs/2026/kentucky.md @@ -0,0 +1,13 @@ +--- +title: "University of Kentucky" +author: "Emanuele Usai" +layout: default +organization: Kentucky +logo: 800px-University_of_Kentucky_seal.svg.png +description: | + The University of Kentucky (UK or UKY) is a public land-grant research university in Lexington, Kentucky. + + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/kgi.md b/_gsocorgs/2026/kgi.md new file mode 100644 index 00000000..e47285fc --- /dev/null +++ b/_gsocorgs/2026/kgi.md @@ -0,0 +1,13 @@ +--- +title: "Keck Graduate Institute" +author: "Emanuele Usai" +layout: default +organization: KGI +logo: KGI_logo_2018.png +description: | + Keck Graduate Institute (KGI) is a private graduate school in Claremont, California. + + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/lanl.md b/_gsocorgs/2026/lanl.md new file mode 100644 index 00000000..f2004f23 --- /dev/null +++ b/_gsocorgs/2026/lanl.md @@ -0,0 +1,13 @@ +--- +title: "Los Alamos National Laboratory" +author: "Emanuele Usai" +layout: default +organization: LANL +logo: 1280px-Los_Alamos_logo.svg.png +description: | + Los Alamos National Laboratory (Los Alamos or LANL for short) is a United States Department of Energy national laboratory. + + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/leeds.md b/_gsocorgs/2026/leeds.md new file mode 100644 index 00000000..a840ddee --- /dev/null +++ b/_gsocorgs/2026/leeds.md @@ -0,0 +1,11 @@ +--- +title: "University of Leeds" +author: "Emanuele Usai" +layout: default +organization: Leeds +logo: 545px-University_of_Leeds_crest.svg.png +description: | + The University of Leeds is a public research university in Leeds, West Yorkshire, England. +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/mathworks.md b/_gsocorgs/2026/mathworks.md new file mode 100644 index 00000000..ff1edd96 --- /dev/null +++ b/_gsocorgs/2026/mathworks.md @@ -0,0 +1,11 @@ +--- +title: "MathWorks" +author: "Eric Reinhardt" +layout: default +organization: MathWorks +logo: MathWorks-logo.png +description: | + MathWorks is the leading developer of mathematical computing software for engineers and scientists. +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/middle east technical university.md b/_gsocorgs/2026/middle east technical university.md new file mode 100644 index 00000000..39362762 --- /dev/null +++ b/_gsocorgs/2026/middle east technical university.md @@ -0,0 +1,12 @@ +--- +title: "Middle East Technical University" +author: "Emanuele Usai" +layout: default +organization: Middle East Technical University +logo: 909px-Logo_of_METU.svg.png +description: | + Middle East Technical University (commonly referred to as METU) is a public technical university located in Ankara, Turkey. + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/mit.md b/_gsocorgs/2026/mit.md new file mode 100644 index 00000000..c67e9b4e --- /dev/null +++ b/_gsocorgs/2026/mit.md @@ -0,0 +1,11 @@ +--- +title: "Massachusetts Institute of Technology" +author: "Eric Reinhardt" +layout: default +organization: MIT +logo: MIT-logo.png +description: | + The Massachusetts Institute of Technology (MIT) is a private land-grant research university in Cambridge, Massachusetts established in 1861. +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/new york university.md b/_gsocorgs/2026/new york university.md new file mode 100644 index 00000000..2959870a --- /dev/null +++ b/_gsocorgs/2026/new york university.md @@ -0,0 +1,12 @@ +--- +title: "New York University" +author: "Emanuele Usai" +layout: default +organization: New York University +logo: New_York_University_Seal.svg.png +description: | + New York University (NYU) is a private research university in New York City. Chartered in 1831 by the New York State Legislature, NYU was founded by a group of New Yorkers led by then Secretary of the Treasury Albert Gallatin. + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/niser.md b/_gsocorgs/2026/niser.md new file mode 100644 index 00000000..982c6dbf --- /dev/null +++ b/_gsocorgs/2026/niser.md @@ -0,0 +1,12 @@ +--- +title: "NISER" +author: "Emanuele Usai" +layout: default +organization: NISER +logo: NISER_odisha.png +description: | + The National Institute of Science Education and Research (NISER) is a public research institute in Bhubaneswar, Odisha, India. Founded in 2006, it was ranked second in the country by the Nature Index 2020. + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/ntua.md b/_gsocorgs/2026/ntua.md new file mode 100644 index 00000000..c3b3e9d0 --- /dev/null +++ b/_gsocorgs/2026/ntua.md @@ -0,0 +1,12 @@ +--- +title: "NTUA" +author: "Emanuele Usai" +layout: default +organization: NTUA +logo: Pyrforos2.png +description: | + The National (Metsovian) Technical University of Athens, sometimes known as Athens Polytechnic, is among the oldest higher education institutions of Greece and the most prestigious among engineering schools. + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/paris.md b/_gsocorgs/2026/paris.md new file mode 100644 index 00000000..d6a48614 --- /dev/null +++ b/_gsocorgs/2026/paris.md @@ -0,0 +1,12 @@ +--- +title: "Institut polytechnique de Paris" +author: "Emanuele Usai" +layout: default +organization: Paris +logo: Institut_polytechnique_de_Paris.png +description: | + The Polytechnic Institute of Paris is a research university system located in Palaiseau, France. + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/princess sumaya university for technology.md b/_gsocorgs/2026/princess sumaya university for technology.md new file mode 100644 index 00000000..9c77359c --- /dev/null +++ b/_gsocorgs/2026/princess sumaya university for technology.md @@ -0,0 +1,12 @@ +--- +title: "Princess Sumaya University for Technology" +author: "Emanuele Usai" +layout: default +organization: Princess Sumaya University for Technology +logo: Princess_Sumaya_University_for_Technology_logo.png +description: | + Princess Sumaya University for Technology (PSUT), established in 1991, is a specialized, Non-governmental, Non-profit, Jordanian university, owned by the leading applied research centre in Jordan, the Royal Scientific Society (RSS). + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/qassim.md b/_gsocorgs/2026/qassim.md new file mode 100644 index 00000000..9c2b8c3f --- /dev/null +++ b/_gsocorgs/2026/qassim.md @@ -0,0 +1,12 @@ +--- +title: "Qassim University" +author: "Eric Reinhardt" +layout: default +organization: Qassim University +logo: QU-logo.jpg +description: | + [Qassim University](https://www.qu.edu.sa/) is a major public university in Qassim, Saudi Arabia. It's one of the top ranking and largest universities in Saudi Arabia with over 50,000 students enrolled. + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/rwth.md b/_gsocorgs/2026/rwth.md new file mode 100644 index 00000000..62a459b2 --- /dev/null +++ b/_gsocorgs/2026/rwth.md @@ -0,0 +1,12 @@ +--- +title: "RWTH Aachen University" +author: "Emanuele Usai" +layout: default +organization: RWTH +logo: RWTH_Logo_3.svg.png +description: | + RWTH Aachen University is a German public research university located in Aachen, North Rhine-Westphalia, Germany. + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/southcarolina.md b/_gsocorgs/2026/southcarolina.md new file mode 100644 index 00000000..b77f3f98 --- /dev/null +++ b/_gsocorgs/2026/southcarolina.md @@ -0,0 +1,11 @@ +--- +title: "University of South Carolina" +author: "Emanuele Usai" +layout: default +organization: SouthCarolina +logo: 800px-University_of_South_Carolina_seal.svg.png +description: | + The University of South Carolina (USC, UofSC, SC, or simply Carolina) is a public research university in Columbia, South Carolina. +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/tum.md b/_gsocorgs/2026/tum.md new file mode 100644 index 00000000..b3317bf9 --- /dev/null +++ b/_gsocorgs/2026/tum.md @@ -0,0 +1,12 @@ +--- +title: "Technical University of Munich" +author: "Emanuele Usai" +layout: default +organization: TUM +logo: Logo_of_the_Technical_University_of_Munich.svg.png +description: | + The Technical University of Munich is a public research university in Munich, Germany. It specializes in engineering, technology, medicine, and applied and natural sciences. + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/vit.md b/_gsocorgs/2026/vit.md new file mode 100644 index 00000000..6226d3d5 --- /dev/null +++ b/_gsocorgs/2026/vit.md @@ -0,0 +1,12 @@ +--- +title: "VIT" +author: "Emanuele Usai" +layout: default +organization: VIT +logo: Vishwakarma_Institute_of_Technology.png +description: | + Vishwakarma Institute of Technology (VIT) is an autonomous institute in Pune, Maharashtra, India.[1] Established in 1983, the institute is affiliated with the Savitribai Phule Pune University and run by the Bansilal Ramnath Agarwal Charitable Trust. + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/washington.md b/_gsocorgs/2026/washington.md new file mode 100644 index 00000000..9e844f5f --- /dev/null +++ b/_gsocorgs/2026/washington.md @@ -0,0 +1,12 @@ +--- +title: "University of Washington" +author: "Emanuele Usai" +layout: default +organization: Washington +logo: University_of_Washington_seal.svg.png +description: | + The University of Washington is a public research university in Seattle, Washington. + +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocorgs/2026/wisconsin.md b/_gsocorgs/2026/wisconsin.md new file mode 100644 index 00000000..e7c0302b --- /dev/null +++ b/_gsocorgs/2026/wisconsin.md @@ -0,0 +1,11 @@ +--- +title: "University of Wisconsin–Madison" +author: "Shaojun Sun" +layout: default +organization: Wisconsin +logo: WISCONSIN-logo.png +description: | + The [University of Wisconsin-Madison](https://www.wisc.edu/) is a public research university in Madison, Wisconsin. Since its founding in 1848, this campus has been a catalyst for the extraordinary. +--- + +{% include gsoc_proposal.ext %} diff --git a/_gsocprojects/2026/project_CMS.md b/_gsocprojects/2026/project_CMS.md new file mode 100644 index 00000000..c1c3ee76 --- /dev/null +++ b/_gsocprojects/2026/project_CMS.md @@ -0,0 +1,9 @@ +--- +project: CMS +layout: default +logo: CMS-logo.png +description: | + [CMS](http://cms.cern/) is a high-energy physics experiment at the [Large Hadron Collider](http://home.web.cern.ch/topics/large-hadron-collider) (LHC) at [CERN](http://home.cern/). It is a general-purpose detector that is designed to observe any new physics phenomena that the LHC might reveal. CMS acts as a giant, high-speed camera, taking 3D "photographs" of particle collisions from all directions up to 40 million times each second. The CMS collects few tens of Peta-Bytes of data each year and processes them through Worldwide LHC Computing Grid infrastructure around the globe. +--- + +{% include gsoc_project.ext %} diff --git a/_gsocprojects/2026/project_DEEPLENSE.md b/_gsocprojects/2026/project_DEEPLENSE.md new file mode 100644 index 00000000..6352e885 --- /dev/null +++ b/_gsocprojects/2026/project_DEEPLENSE.md @@ -0,0 +1,10 @@ +--- + +project: DEEPLENSE +layout: default +logo: DEEPLENSE-logo.png +description: | + [DeepLense](https://arxiv.org/abs/1909.07346) is a deep learning pipeline for particle dark matter searches with strong gravitational lensing. +--- + +{% include gsoc_project.ext %} diff --git a/_gsocprojects/2026/project_E2E.md b/_gsocprojects/2026/project_E2E.md new file mode 100644 index 00000000..59cde15f --- /dev/null +++ b/_gsocprojects/2026/project_E2E.md @@ -0,0 +1,10 @@ +--- +project: E2E +layout: default +logo: E2E-logo.png +description: | + The [End-to-End Deep Learning (E2E)](https://arxiv.org/abs/1807.11916) project focuses on the development of particle and event reconstruction and identification tasks with end-to-end deep learning approaches. +--- + + +{% include gsoc_project.ext %} diff --git a/_gsocprojects/2026/project_EXXA.md b/_gsocprojects/2026/project_EXXA.md new file mode 100644 index 00000000..43b963fa --- /dev/null +++ b/_gsocprojects/2026/project_EXXA.md @@ -0,0 +1,9 @@ +--- +project: EXXA +layout: default +logo: exxa.png +description: | + The purpose of EXXA is to use simulations and publicly available data from observations intended to identify exoplanets and physical processes in planet-forming environments. +--- + +{% include gsoc_project.ext %} diff --git a/_gsocprojects/2026/project_FALCON.md b/_gsocprojects/2026/project_FALCON.md new file mode 100644 index 00000000..caac41b2 --- /dev/null +++ b/_gsocprojects/2026/project_FALCON.md @@ -0,0 +1,10 @@ +--- + +project: DeepFALCON +layout: default +logo: FALCON-logo.png +description: | + DeepFalcon is an ultra-fast non-parametric detector simulation package that automatically abstracts detector response, usually done by hand in fast-simulators used by particle physics experiments. +--- + +{% include gsoc_project.ext %} diff --git a/_gsocprojects/2026/project_FASEROH.md b/_gsocprojects/2026/project_FASEROH.md new file mode 100644 index 00000000..17306a51 --- /dev/null +++ b/_gsocprojects/2026/project_FASEROH.md @@ -0,0 +1,9 @@ +--- +project: FASEROH +layout: default +logo: FASEROH.png +description: | + FASEROH is an open source project researching seq2seq model that maps histograms to empirical symbolic representations. +--- + +{% include gsoc_project.ext %} diff --git a/_gsocprojects/2026/project_GENIE.md b/_gsocprojects/2026/project_GENIE.md new file mode 100644 index 00000000..70479780 --- /dev/null +++ b/_gsocprojects/2026/project_GENIE.md @@ -0,0 +1,9 @@ +--- +project: GENIE +layout: default +logo: genie-logo.png +description: | + The GENIE project focuses on the application of novel machine learning techniques to anomaly detection and event generation in particle physics +--- + +{% include gsoc_project.ext %} diff --git a/_gsocprojects/2026/project_LOX.md b/_gsocprojects/2026/project_LOX.md new file mode 100644 index 00000000..77e4dee2 --- /dev/null +++ b/_gsocprojects/2026/project_LOX.md @@ -0,0 +1,10 @@ +--- +project: LOX +layout: default +logo: 588132f9b3cb3.image_.jpg +description: | + The Lunar Occultation Explorer (LOX) is a lunar-orbiting nuclear astrophysics mission that will probe the Cosmos at MeV energies. +--- + + +{% include gsoc_project.ext %} diff --git a/_gsocprojects/2026/project_LUNARPROSPECTOR.md b/_gsocprojects/2026/project_LUNARPROSPECTOR.md new file mode 100644 index 00000000..5b95c691 --- /dev/null +++ b/_gsocprojects/2026/project_LUNARPROSPECTOR.md @@ -0,0 +1,9 @@ +--- +project: Lunar Prospector +layout: default +logo: Artist-rendering-spacecraft-Lunar-Prospector.jpg +description: | + Lunar Prospector was the third mission selected by NASA for full development and construction as part of the Discovery Program. The 19-month mission was designed for a low polar orbit investigation of the Moon, including mapping of surface composition including polar ice deposits, measurements of magnetic and gravity fields, and study of lunar outgassing events. The mission ended July 31, 1999, when the orbiter was deliberately crashed into a crater near the lunar south pole, after the presence of water ice was successfully detected. +--- + +{% include gsoc_project.ext %} diff --git a/_gsocprojects/2026/project_MESSENGER.md b/_gsocprojects/2026/project_MESSENGER.md new file mode 100644 index 00000000..75ac8aba --- /dev/null +++ b/_gsocprojects/2026/project_MESSENGER.md @@ -0,0 +1,9 @@ +--- +project: MESSENGER +layout: default +logo: 800px-MESSENGER.jpg +description: | + NASA's MESSENGER spacecraft orbited Mercury for more than four years. Among its accomplishments, the mission determined Mercury’s surface composition, revealed its geological history, discovered details about its internal magnetic field, and verified its polar deposits are dominantly water-ice. The mission ended when MESSENGER slammed into Mercury’s surface. +--- + +{% include gsoc_project.ext %} diff --git a/_gsocprojects/2026/project_ML4DQM.md b/_gsocprojects/2026/project_ML4DQM.md new file mode 100644 index 00000000..403b1138 --- /dev/null +++ b/_gsocprojects/2026/project_ML4DQM.md @@ -0,0 +1,11 @@ +--- +project: ML4DQM +layout: default +logo: ml4dqm.jpg +description: | + + + Data Quality Monitoring (DQM) is an important aspect of every high-energy physics experiment needed to avoid taking low-quality data. The goal of DQM is to track important information about the detector and the data and catch problems in realtime. This monitoring happens both online and offline to ensure optimal operation of the experiment. The goal of the ML4DQML project is to use machine learning to aid human shifters with identification of anomalies to help make better decisions about the quality of the data. +--- + +{% include gsoc_project.ext %} diff --git a/_gsocprojects/2026/project_NEURODYAD.md b/_gsocprojects/2026/project_NEURODYAD.md new file mode 100644 index 00000000..739ed482 --- /dev/null +++ b/_gsocprojects/2026/project_NEURODYAD.md @@ -0,0 +1,9 @@ +--- +project: NEURODYAD +layout: default +logo: NeuroDyad.png +description: | + This project aims to develop a computational pipeline using the Contrastive Embedding for Behavioral and Neural Analysis (CEBRA) method to analyze time-locked EEG data from interacting participants. +--- + +{% include gsoc_project.ext %} diff --git a/_gsocprojects/2026/project_QMLHEP.md b/_gsocprojects/2026/project_QMLHEP.md new file mode 100644 index 00000000..04d65cd4 --- /dev/null +++ b/_gsocprojects/2026/project_QMLHEP.md @@ -0,0 +1,9 @@ +--- +project: QMLHEP +layout: default +logo: QMLHEP-logo.png +description: | + QMLHEP project implements Quantum Machine Learning methods for physics analysis. +--- + +{% include gsoc_project.ext %} diff --git a/_gsocprojects/2026/project_SYMBA.md b/_gsocprojects/2026/project_SYMBA.md new file mode 100644 index 00000000..8dc50920 --- /dev/null +++ b/_gsocprojects/2026/project_SYMBA.md @@ -0,0 +1,9 @@ +--- +project: SYMBA +layout: default +logo: SYMBA.png +description: | + SYMBA is an open source project researching symbolic machine learning techniques to predict the squared amplitudes and cross section for high-energy physics. +--- + +{% include gsoc_project.ext %} diff --git a/_gsocprojects/2026/project_SYMMETRY.md b/_gsocprojects/2026/project_SYMMETRY.md new file mode 100644 index 00000000..d8057771 --- /dev/null +++ b/_gsocprojects/2026/project_SYMMETRY.md @@ -0,0 +1,8 @@ +--- +project: SYMMETRY +layout: default +logo: SYMMETRY.png +description: Recent success in the domain of unsupervised and semi-supervised learning has been lately a pivotal factor for development of Physics Aware and Symmetry Aware Machine Learning techniques where a model learns the symmetry of a dataset as a meta task and ends up learning the physics through the same. This project will focus on ways to learn the symmetries using semi-supervised approaches using CMS data. +--- + +{% include gsoc_project.ext %} diff --git a/_gsocproposals/2025/proposal_CMS1.md b/_gsocproposals/2025/proposal_CMS1.md index f7276167..622c1785 100644 --- a/_gsocproposals/2025/proposal_CMS1.md +++ b/_gsocproposals/2025/proposal_CMS1.md @@ -21,8 +21,10 @@ Total project length: 175/350 hours. * Improve existing code pipeline with features like multi-GPU parallelism and flexible preprocessing and analysis options * Deploy the developed models and pipeline on simulated physics data and analyze performance gains and changes in model understanding from the techniques used + ## Requirements Significant experience in Python and Machine Learning in Pytorch. Preferably some experience with Transformers and multi-GPU parallelization or with the ROOT library developed by CERN. diff --git a/_gsocproposals/2025/proposal_CMS2.md b/_gsocproposals/2025/proposal_CMS2.md index a3069b88..eb33ad0e 100644 --- a/_gsocproposals/2025/proposal_CMS2.md +++ b/_gsocproposals/2025/proposal_CMS2.md @@ -25,8 +25,10 @@ Total project length: 175/350 hours. ## Expected results * Trained models and benchmarks on simulated data + ## Requirements Python, C++, and some previous experience in Machine Learning. diff --git a/_gsocproposals/2025/proposal_DEEPLENSE1.md b/_gsocproposals/2025/proposal_DEEPLENSE1.md index ff64473e..d288f830 100644 --- a/_gsocproposals/2025/proposal_DEEPLENSE1.md +++ b/_gsocproposals/2025/proposal_DEEPLENSE1.md @@ -38,8 +38,10 @@ Advanced * Python, PyTorch, experience with machine learning, and familiarity with computer vision techniques. * Understanding of self-supervised learning, representation learning, and deep learning architectures. + ## Mentors * [Michael Toomey](mailto:ml4-sci@cern.ch) (Massachusetts Institute of Technology) diff --git a/_gsocproposals/2025/proposal_DEEPLENSE2.md b/_gsocproposals/2025/proposal_DEEPLENSE2.md index 32115426..fb26d928 100644 --- a/_gsocproposals/2025/proposal_DEEPLENSE2.md +++ b/_gsocproposals/2025/proposal_DEEPLENSE2.md @@ -30,8 +30,10 @@ Intermediate/Advanced ## Requirements Python, PyTorch, experience with machine learning, knowledge of computer vision techniques, familiarity with autoencoders. + ## Mentors * [Michael Toomey](mailto:ml4-sci@cern.ch) (Massachusetts Institute of Technology) diff --git a/_gsocproposals/2025/proposal_DEEPLENSE3.md b/_gsocproposals/2025/proposal_DEEPLENSE3.md index dbaa486b..73cf6d96 100644 --- a/_gsocproposals/2025/proposal_DEEPLENSE3.md +++ b/_gsocproposals/2025/proposal_DEEPLENSE3.md @@ -32,8 +32,10 @@ Intermediate/Advanced ## Requirements Python, PyTorch, experience with machine learning, knowledge of computer vision techniques, familiarity with autoencoders. + ## Mentors * [Michael Toomey](mailto:ml4-sci@cern.ch) (Massachusetts Institute of Technology) diff --git a/_gsocproposals/2025/proposal_DEEPLENSE4.md b/_gsocproposals/2025/proposal_DEEPLENSE4.md index 3c8fef7d..ed94e2bb 100644 --- a/_gsocproposals/2025/proposal_DEEPLENSE4.md +++ b/_gsocproposals/2025/proposal_DEEPLENSE4.md @@ -35,8 +35,10 @@ Intermediate/Advanced ## Requirements Python, PyTorch, experience with machine learning, familiarity with astrophysics datasets. + ## Mentors * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) diff --git a/_gsocproposals/2025/proposal_DEEPLENSE5.md b/_gsocproposals/2025/proposal_DEEPLENSE5.md index 2bf2cbc2..7546570e 100644 --- a/_gsocproposals/2025/proposal_DEEPLENSE5.md +++ b/_gsocproposals/2025/proposal_DEEPLENSE5.md @@ -36,8 +36,10 @@ Intermediate/Advanced * Python, PyTorch and relevant past experience in Machine Learning. * Familiarity with astrophysics and gravitational lensing is preferred but not required. + ## Mentors * [Michael Toomey](mailto:ml4-sci@cern.ch) (Massachusetts Institute of Technology) diff --git a/_gsocproposals/2025/proposal_DEEPLENSE6.md b/_gsocproposals/2025/proposal_DEEPLENSE6.md index d25c562b..6e0e67b9 100644 --- a/_gsocproposals/2025/proposal_DEEPLENSE6.md +++ b/_gsocproposals/2025/proposal_DEEPLENSE6.md @@ -33,8 +33,10 @@ Intermediate/Advanced ## Requirements Python, familiarity with astronomical data processing, and understanding of data access APIs and pipelines. + ## Mentors * [Michael Toomey](mailto:ml4-sci@cern.ch) (Massachusetts Institute of Technology) diff --git a/_gsocproposals/2025/proposal_E2E2.md b/_gsocproposals/2025/proposal_E2E2.md index 37b29e8d..41f3b7d5 100644 --- a/_gsocproposals/2025/proposal_E2E2.md +++ b/_gsocproposals/2025/proposal_E2E2.md @@ -34,8 +34,10 @@ Advanced * Significant experience in Python and Machine Learning in Pytorch. * Preferably some experience with Transformers and multi-GPU parallelization or with the ROOT library developed by CERN. + ## Mentors * [Diptarko Choudhury](mailto:ml4-sci@cern.ch) (NISER) diff --git a/_gsocproposals/2025/proposal_E2E3.md b/_gsocproposals/2025/proposal_E2E3.md index 604f6d6d..235a34fd 100644 --- a/_gsocproposals/2025/proposal_E2E3.md +++ b/_gsocproposals/2025/proposal_E2E3.md @@ -37,8 +37,10 @@ Intermediate ## Requirements Python, PyTorch and some previous experience in Machine Learning. + ## Mentors * [Emanuele Usai](mailto:ml4-sci@cern.ch) (University of Alabama) diff --git a/_gsocproposals/2025/proposal_E2E4.md b/_gsocproposals/2025/proposal_E2E4.md index efa9bb29..53e14ce5 100644 --- a/_gsocproposals/2025/proposal_E2E4.md +++ b/_gsocproposals/2025/proposal_E2E4.md @@ -34,8 +34,10 @@ Intermediate ## Requirements C++, Python, PyTorch and some previous experience in Machine Learning. + ## Mentors * [Ruchi Chudasama](mailto:ml4-sci@cern.ch) (University of Alabama) diff --git a/_gsocproposals/2025/proposal_E2E5.md b/_gsocproposals/2025/proposal_E2E5.md index cc310ea7..02c7cb80 100644 --- a/_gsocproposals/2025/proposal_E2E5.md +++ b/_gsocproposals/2025/proposal_E2E5.md @@ -41,8 +41,10 @@ Intermediate C++, Python, PyTorch and some previous experience in Machine Learning. + ## Mentors diff --git a/_gsocproposals/2025/proposal_E2E6.md b/_gsocproposals/2025/proposal_E2E6.md index 8d9a34b2..653139ad 100644 --- a/_gsocproposals/2025/proposal_E2E6.md +++ b/_gsocproposals/2025/proposal_E2E6.md @@ -45,8 +45,10 @@ Intermediate C++, Python, PyTorch and some previous experience in Machine Learning. + ## Mentors diff --git a/_gsocproposals/2025/proposal_E2E7.md b/_gsocproposals/2025/proposal_E2E7.md index 2b699080..ba852016 100644 --- a/_gsocproposals/2025/proposal_E2E7.md +++ b/_gsocproposals/2025/proposal_E2E7.md @@ -42,8 +42,10 @@ Advanced C++, Python, PyTorch and some previous experience in Machine Learning. + ## Mentors diff --git a/_gsocproposals/2025/proposal_E2E8.md b/_gsocproposals/2025/proposal_E2E8.md index 46d12b1a..72c8932d 100644 --- a/_gsocproposals/2025/proposal_E2E8.md +++ b/_gsocproposals/2025/proposal_E2E8.md @@ -41,8 +41,10 @@ Advanced * Experience with PyTorch and TensorFlow * Previous experience in Deep Learning + ## Mentors * [Diptarko Choudhury](mailto:ml4-sci@cern.ch) (NISER) diff --git a/_gsocproposals/2025/proposal_FALCON1.md b/_gsocproposals/2025/proposal_FALCON1.md index b724e0a7..6e5a34d9 100644 --- a/_gsocproposals/2025/proposal_FALCON1.md +++ b/_gsocproposals/2025/proposal_FALCON1.md @@ -22,8 +22,10 @@ organization: Total project length: 175/350 hours. + ## Requirements diff --git a/_gsocproposals/2025/proposal_FALCON2.md b/_gsocproposals/2025/proposal_FALCON2.md index a86c57d7..6a867ceb 100644 --- a/_gsocproposals/2025/proposal_FALCON2.md +++ b/_gsocproposals/2025/proposal_FALCON2.md @@ -23,8 +23,10 @@ organization: Total project length: 175/350 hours. + ## Requirements Strong machine learning skills, good knowledge of C++ and Python. Interest in Machine Learning algorithms and applications. diff --git a/_gsocproposals/2025/proposal_FALCON3.md b/_gsocproposals/2025/proposal_FALCON3.md index d13cad59..8a5f6afa 100644 --- a/_gsocproposals/2025/proposal_FALCON3.md +++ b/_gsocproposals/2025/proposal_FALCON3.md @@ -21,8 +21,10 @@ organization: Total project length: 175/350 hours. + ## Requirements Strong machine learning skills, good knowledge of C++ and Python. Interest in Machine Learning algorithms and applications. diff --git a/_gsocproposals/2025/proposal_FALCON4.md b/_gsocproposals/2025/proposal_FALCON4.md index 33eb8a0e..6b2fc43a 100644 --- a/_gsocproposals/2025/proposal_FALCON4.md +++ b/_gsocproposals/2025/proposal_FALCON4.md @@ -22,8 +22,10 @@ Total project length: 175/350 hours. ## Task ideas and expected results Application of optimal transport techniques using deep learning for classification, anomaly detection and graph generative models in high energy physics. + ## Requirements Strong machine learning skills, good knowledge of C++ and Python. Interest in Machine Learning algorithms and applications. diff --git a/_gsocproposals/2025/proposal_FASEROH.md b/_gsocproposals/2025/proposal_FASEROH.md index e4bda89d..bb239d5d 100644 --- a/_gsocproposals/2025/proposal_FASEROH.md +++ b/_gsocproposals/2025/proposal_FASEROH.md @@ -26,8 +26,10 @@ Total project length: 175/350 hours. ## Requirements Python, previous experience in Machine Learning. + ## Mentors diff --git a/_gsocproposals/2025/proposal_NeuroDyad1.md b/_gsocproposals/2025/proposal_NeuroDyad1.md index 6161a3e1..a333686c 100644 --- a/_gsocproposals/2025/proposal_NeuroDyad1.md +++ b/_gsocproposals/2025/proposal_NeuroDyad1.md @@ -26,8 +26,10 @@ Total project length: 175/350 hours. * Train the model on neurotypical and clinical dyads, analyzing how latent embeddings encode neural interactions. * Identify differentiating features (e.g., variance in manifold structure, neural synchrony, connectivity patterns) that may characterize clinical interactions. + ## Requirements MATLAB, Python; solid understanding of linear algebra, topology, signal processing. Interest in clinical neuroscience and computational methods. diff --git a/_gsocproposals/2025/proposal_QMLHEP1.md b/_gsocproposals/2025/proposal_QMLHEP1.md index ae6bd531..eadc4c95 100644 --- a/_gsocproposals/2025/proposal_QMLHEP1.md +++ b/_gsocproposals/2025/proposal_QMLHEP1.md @@ -37,8 +37,10 @@ Total project length: 175/350 hours. * Demonstrate that the QGAN can produce samples whose statistical properties match those of the real dataset. + ## Requirements * Solid knowledge of machine learning and deep learning diff --git a/_gsocproposals/2025/proposal_QMLHEP10.md b/_gsocproposals/2025/proposal_QMLHEP10.md index ee52b8cc..a52af234 100644 --- a/_gsocproposals/2025/proposal_QMLHEP10.md +++ b/_gsocproposals/2025/proposal_QMLHEP10.md @@ -29,8 +29,10 @@ Total project length: 175 hours. + ## Requirements * Solid knowledge of machine learning and deep learning diff --git a/_gsocproposals/2025/proposal_QMLHEP11.md b/_gsocproposals/2025/proposal_QMLHEP11.md index bccf6502..fcc7f002 100644 --- a/_gsocproposals/2025/proposal_QMLHEP11.md +++ b/_gsocproposals/2025/proposal_QMLHEP11.md @@ -29,8 +29,10 @@ Total project length: 175 hours. * Trained quantum foundation model * Benchmark of the performance on a HEP dataset compared against a classical reference model + ## Requirements * Strong background in Machine Learning & Deep Learning. diff --git a/_gsocproposals/2025/proposal_QMLHEP2.md b/_gsocproposals/2025/proposal_QMLHEP2.md index 9c8887e0..1041501d 100644 --- a/_gsocproposals/2025/proposal_QMLHEP2.md +++ b/_gsocproposals/2025/proposal_QMLHEP2.md @@ -25,8 +25,10 @@ Total project length: 175/350 hours. * Trained Quantum Graph Neural Network with e.g. Pennylane framework. * Apply the Quantum Machine Learning method to LHC physics analysis and compare to classical machine learning methods. + ## Requirements * Solid knowledge of machine learning and deep learning diff --git a/_gsocproposals/2025/proposal_QMLHEP4.md b/_gsocproposals/2025/proposal_QMLHEP4.md index de103575..fbdb8b30 100644 --- a/_gsocproposals/2025/proposal_QMLHEP4.md +++ b/_gsocproposals/2025/proposal_QMLHEP4.md @@ -25,8 +25,10 @@ Total project length: 175/350 hours. * Trained equivariant quantum neural networks with a QML framework (PennyLane, Cirq, Bloqade, etc.). * Benchmark of the performance against a non-equivariant model + ## Requirements * Solid knowledge of machine learning and deep learning diff --git a/_gsocproposals/2025/proposal_QMLHEP5.md b/_gsocproposals/2025/proposal_QMLHEP5.md index 76cb9d83..60dec4c5 100644 --- a/_gsocproposals/2025/proposal_QMLHEP5.md +++ b/_gsocproposals/2025/proposal_QMLHEP5.md @@ -27,8 +27,10 @@ Total project length: 175 hours. * Trained quantum transformer model. * Benchmark of the performance on a HEP dataset compared against a classical reference model + ## Requirements * Solid knowledge of machine learning and deep learning diff --git a/_gsocproposals/2025/proposal_QMLHEP6.md b/_gsocproposals/2025/proposal_QMLHEP6.md index 7c12b86f..9456c993 100644 --- a/_gsocproposals/2025/proposal_QMLHEP6.md +++ b/_gsocproposals/2025/proposal_QMLHEP6.md @@ -29,8 +29,10 @@ Total project length: 175/350 hours. * Benchmark of the performance against a standard encoding + ## Requirements * Solid knowledge of machine learning and deep learning diff --git a/_gsocproposals/2025/proposal_QMLHEP7.md b/_gsocproposals/2025/proposal_QMLHEP7.md index dc2334a4..877641cb 100644 --- a/_gsocproposals/2025/proposal_QMLHEP7.md +++ b/_gsocproposals/2025/proposal_QMLHEP7.md @@ -33,8 +33,10 @@ Total project length: 175 hours. + ## Requirements * Solid knowledge of machine learning and deep learning diff --git a/_gsocproposals/2025/proposal_QMLHEP8.md b/_gsocproposals/2025/proposal_QMLHEP8.md index 708c3462..02842ac7 100644 --- a/_gsocproposals/2025/proposal_QMLHEP8.md +++ b/_gsocproposals/2025/proposal_QMLHEP8.md @@ -37,8 +37,10 @@ Total project length: 175 hours. + ## Requirements * Strong background in Machine Learning & Deep Learning. diff --git a/_gsocproposals/2025/proposal_QMLHEP9.md b/_gsocproposals/2025/proposal_QMLHEP9.md index 656a3a74..3170b167 100644 --- a/_gsocproposals/2025/proposal_QMLHEP9.md +++ b/_gsocproposals/2025/proposal_QMLHEP9.md @@ -35,8 +35,10 @@ Total project length: 175 hours. + ## Requirements * Strong background in Machine Learning & Deep Learning. diff --git a/_gsocproposals/2025/proposal_SYMBA1.md b/_gsocproposals/2025/proposal_SYMBA1.md index fd745854..fb736762 100644 --- a/_gsocproposals/2025/proposal_SYMBA1.md +++ b/_gsocproposals/2025/proposal_SYMBA1.md @@ -29,8 +29,10 @@ Significant experience with Transformer machine learning models in Python (prefe ## Difficulty Level Advanced + ## Mentors * [Eric Reinhardt](mailto:ml4-sci@cern.ch) (University of Alabama) diff --git a/_gsocproposals/2025/proposal_SYMBA2.md b/_gsocproposals/2025/proposal_SYMBA2.md index 3566eeb2..d59c43e2 100644 --- a/_gsocproposals/2025/proposal_SYMBA2.md +++ b/_gsocproposals/2025/proposal_SYMBA2.md @@ -27,8 +27,10 @@ Significant experience with machine learning models in Python (preferably using ## Difficulty Level Advanced + ## Mentors diff --git a/_gsocproposals/2025/proposal_SYMBA3.md b/_gsocproposals/2025/proposal_SYMBA3.md index 362b52c3..7b5b383a 100644 --- a/_gsocproposals/2025/proposal_SYMBA3.md +++ b/_gsocproposals/2025/proposal_SYMBA3.md @@ -27,8 +27,10 @@ Significant experience with Transformer machine learning models in Python (prefe ## Difficulty Level Intermediate + ## Mentors diff --git a/_gsocproposals/2025/proposal_SYMBA4.md b/_gsocproposals/2025/proposal_SYMBA4.md index 936fa17d..950843e3 100644 --- a/_gsocproposals/2025/proposal_SYMBA4.md +++ b/_gsocproposals/2025/proposal_SYMBA4.md @@ -27,8 +27,10 @@ Significant experience with developing foundational models in Python (preferably ## Difficulty Level Advanced + ## Mentors diff --git a/_gsocproposals/2025/proposal_SYMBA5.md b/_gsocproposals/2025/proposal_SYMBA5.md index 14085ff9..0a4fe1ee 100644 --- a/_gsocproposals/2025/proposal_SYMBA5.md +++ b/_gsocproposals/2025/proposal_SYMBA5.md @@ -27,8 +27,10 @@ Significant experience with Transformer machine learning models in Python (prefe ## Difficulty Level Advanced + ## Mentors diff --git a/_gsocproposals/2025/proposal_SYMBA6.md b/_gsocproposals/2025/proposal_SYMBA6.md index 1a0c8cd5..41f7da2d 100644 --- a/_gsocproposals/2025/proposal_SYMBA6.md +++ b/_gsocproposals/2025/proposal_SYMBA6.md @@ -31,8 +31,10 @@ Total project length: 175/350 hours. * Knowledge of physics and linear algebra is desired + diff --git a/_gsocproposals/2025/proposal_SYMBA7.md b/_gsocproposals/2025/proposal_SYMBA7.md index a70ebcbe..80eb920d 100644 --- a/_gsocproposals/2025/proposal_SYMBA7.md +++ b/_gsocproposals/2025/proposal_SYMBA7.md @@ -29,8 +29,10 @@ Total project length: 175/350 hours. * Some knowledge of physics and extensive knowledge of mathematics preferred. + diff --git a/_gsocproposals/2025/proposal_SYMMETRY1.md b/_gsocproposals/2025/proposal_SYMMETRY1.md index 4303ca1c..7d1cfe91 100644 --- a/_gsocproposals/2025/proposal_SYMMETRY1.md +++ b/_gsocproposals/2025/proposal_SYMMETRY1.md @@ -39,8 +39,10 @@ Advanced * Experience with PyTorch and TensorFlow * Previous experience in Deep Learning + ## Mentors * [Diptarko Choudhury](mailto:ml4-sci@cern.ch) (NISER) diff --git a/_gsocproposals/2026/README.md b/_gsocproposals/2026/README.md new file mode 100644 index 00000000..849dab63 --- /dev/null +++ b/_gsocproposals/2026/README.md @@ -0,0 +1,17 @@ +## To add a new proposal + +* Create a file `proposal_YOURPROJECTyourproposal.md` (look at this example: [`proposal_ROOTspark.md`](https://raw.githubusercontent.com/HSF/hsf.github.io/master/_gsocproposals/2018/proposal_ROOTspark.md)). The following sections are strongly suggested: + +* Under `## Description` write the description of the proposal in the context of your project. Try not to make it exhaustive, but rather readable and appealing for potential students. Note that the project duration has to be tuned for 175 hours projects (rather than 350, as in the previous years) + +* Under `## Task ideas` enumerate the main ideas for the tasks to be completed for the project to succeed. Keep in mind that the target is a summer student who is not familiar with your project who will be working for 175 hours. Try to make these tasks on realistic and concrete, targeting your project main objectives - your future student will write a proposal with a plan of work built-up upon these. + +* Under `## Expected results` enumerate the main objectives that you want to achieve at the end of the summer project to consider the student work successful. It is important to have a realistic and concrete target rather than generic and non-measurable objectives. + +* Under `## Evaluation Tasks` give pointers to the information needed by potential candidates to complete the tests required for being allowed to submit a proposal for your project. Do not write a direct link to the tests here, give this only to the students who expressed interest in your project. It is acceptable that in this section you only write the type of test (e.g. C++ algorithm), but you can be more specific if you need to. + +* Under `##Requirements` add all the mandatory skills you expect for completing the project + +* Under `##Mentor` add the main mentor and at least one more-co-mentor. It is required that a co-mentor is able to replace the main mentor in case of absence and is able to fill the reports required during the program. Add only the names and the e-mails here. Mark the main mentor in bold. The main mentor is responsible for filling up the evaluation forms and exchanging with the admins. + +* You should give useful links for the candidates to be able to understand better your project in case they are interested. diff --git a/_gsocproposals/2026/proposal_CMS1.md b/_gsocproposals/2026/proposal_CMS1.md new file mode 100644 index 00000000..db23d1c0 --- /dev/null +++ b/_gsocproposals/2026/proposal_CMS1.md @@ -0,0 +1,48 @@ +--- +title: Event Classification With Masked Transformer Autoencoders +layout: gsoc_proposal +project: CMS +year: 2026 +organization: + - Alabama + - NISER +--- + +## Description + +One of the key tasks in particle physics analyses is proper classification of particle collision events based on the parent particles and the process that produced them. To handle this task, we’re developing a flexible machine learning pipeline which can be applied to a broad range of classification tasks. We’ll leverage a mix of older and newer techniques for transformer models like masking, pretraining using autoencoder architectures, and cross attention of task-specific attention heads. + +## Duration + +Total project length: 175/350 hours. + +## Task ideas + * Develop a scalable transformer encoder model with task-specific attention heads combined using a cross attention mechanism + * Improve existing code pipeline with features like multi-GPU parallelism and flexible preprocessing and analysis options + * Deploy the developed models and pipeline on simulated physics data and analyze performance gains and changes in model understanding from the techniques used + + + +## Requirements +Significant experience in Python and Machine Learning in Pytorch. Preferably some experience with Transformers and multi-GPU parallelization or with the ROOT library developed by CERN. + +## Difficulty Level +Advanced + +## Mentors + * [Eric Reinhardt](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Diptarko Choudhury](mailto:ml4-sci@cern.ch) (NISER) + * [Ruchi Chudasama](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Emanuele Usai](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Sergei Gleyzer ](mailto:ml4-sci@cern.ch) (University of Alabama) + + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + + +## Links + * [Blog Post 1](https://medium.com/@eric0reinhardt/gsoc-2023-with-ml4sci-reconstruction-and-classification-of-particle-collisions-with-masked-bab8b38958df) + * [Paper 1](https://arxiv.org/abs/2401.00452) diff --git a/_gsocproposals/2026/proposal_CMS2.md b/_gsocproposals/2026/proposal_CMS2.md new file mode 100644 index 00000000..0f8c2e45 --- /dev/null +++ b/_gsocproposals/2026/proposal_CMS2.md @@ -0,0 +1,44 @@ +--- +title: Super resolution at the CMS detector +layout: gsoc_proposal +project: CMS +year: 2026 +organization: + - Alabama + +--- + +## Description + +One of the important aspects of searches for new physics at the Large Hadron Collider (LHC) involves the identification and reconstruction of single particles, jets and event topologies of interest in collision events. In order to correctly reconstruct particles of interest, high resolution is required. + +This project will focus on developing machine learning models to map processed, lower resolution data from particle from simulated particle collisions back to a higher resolution representation. + +## Duration + +Total project length: 175/350 hours. + +## Task ideas + * Develop a machine-learning super-resolution model to upsample particle collision data + * Analyze performance on ground-truth simulated higher resolution data + +## Expected results + * Trained models and benchmarks on simulated data + + + +## Requirements +Python, C++, and some previous experience in Machine Learning. + +## Mentors + + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + + +## Links + * [Paper 1](https://arxiv.org/pdf/2409.16052) + * [Paper 2](https://ml4physicalsciences.github.io/2024/files/NeurIPS_ML4PS_2024_89.pdf) diff --git a/_gsocproposals/2026/proposal_DEEPLENSE1.md b/_gsocproposals/2026/proposal_DEEPLENSE1.md new file mode 100644 index 00000000..c9075061 --- /dev/null +++ b/_gsocproposals/2026/proposal_DEEPLENSE1.md @@ -0,0 +1,60 @@ +--- +title: Foundation Model for Gravitational Lensing +layout: gsoc_proposal +project: DEEPLENSE +project size: 350hr +year: 2026 +organization: + - Alabama + - MIT + - Florida + - PSL +--- + +## Description + +Strong gravitational lensing is a powerful tool for studying dark matter and the large-scale structure of the universe. This project focuses on developing a vision foundation model specifically designed for lensing data, which can be fine-tuned for a variety of downstream tasks, including classification, super-resolution, regression, and lens finding. + +This project will explore different training strategies such as self-supervised learning, contrastive learning, or transformer-based models to learn meaningful representations of lensing images. By leveraging diverse datasets and training methodologies, the model will serve as a general-purpose backbone that can adapt to different astrophysical tasks while improving generalization across various observational conditions. + +## Duration + +Total project length: 350 hours. + +## Difficulty level + +Advanced + +## Task ideas + * Develop a pre-training strategy for learning robust representations of gravitational lensing data. + * Fine-tune the foundation model for multiple tasks such as classification, super-resolution, and regression. + * Evaluate the model's performance on different astrophysical datasets and benchmark against traditional methods. + +## Expected results + * A vision foundation model for gravitational lensing capable of being fine-tuned for various astrophysical tasks. + * Improved generalization and adaptability across different lensing datasets and observational setups. + +## Requirements + * Python, PyTorch, experience with machine learning, and familiarity with computer vision techniques. + * Understanding of self-supervised learning, representation learning, and deep learning architectures. + + + +## Mentors + * [Michael Toomey](mailto:ml4-sci@cern.ch) (Massachusetts Institute of Technology) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Pranath Reddy](mailto:ml4-sci@cern.ch) (University of Florida) + * [Anna Parul](mailto:ml4-sci@cern.ch) (Observatoire de Paris) + + +Please DO NOT contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and include **your CV** and **test results**. The relevant mentors will then get in touch with you. + + +## Links + * [Paper 1](https://arxiv.org/abs/2008.12731) + * [Paper 2](https://arxiv.org/abs/1909.07346) + * [Paper 3](https://arxiv.org/abs/2112.12121) + diff --git a/_gsocproposals/2026/proposal_DEEPLENSE2.md b/_gsocproposals/2026/proposal_DEEPLENSE2.md new file mode 100644 index 00000000..3e8162e8 --- /dev/null +++ b/_gsocproposals/2026/proposal_DEEPLENSE2.md @@ -0,0 +1,50 @@ +--- +title: Unsupervised Super-Resolution and Analysis of Real Lensing Images +layout: gsoc_proposal +project: DEEPLENSE +project size: 175hr/350hr +year: 2026 +organization: + - Alabama + - MIT + - Florida +--- + +## Description +This project’s aims are twofold: developing an unsupervised super-resolution architecture to upscale the quality of lensing images constructed using real galaxy sources, and to obtain insight about the lenses themselves. An unsupervised super-resolution technique could be very valuable for lensing studies as access to high resolution lensing images for training and study can be limited, especially given potential lensing data from upcoming surveys such as Euclid and LSST. The overall goal of this project is to develop an architecture that can better study the characteristics of the gravitational lenses and their substructure. + +## Duration +Total project length: 175/350 hours. + +## Difficulty level +Intermediate/Advanced + +## Task ideas + * Start with unsupervised SR of simulated images and think of ways to bridge the gap to real images. + * Try then integrating into the pipeline modules that study characteristics of the lenses. + +## Expected results + * A more capable architecture that can operate on a wider variety of lensing images, including lensing images created with real galaxy datasets. + * Insight into the lensing systems, and their sub-structures. + +## Requirements +Python, PyTorch, experience with machine learning, knowledge of computer vision techniques, familiarity with autoencoders. + + + +## Mentors + * [Michael Toomey](mailto:ml4-sci@cern.ch) (Massachusetts Institute of Technology) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Pranath Reddy](mailto:ml4-sci@cern.ch) (University of Florida) + +Please DO NOT contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and include **your CV** and **test results**. The relevant mentors will then get in touch with you. + + +## Links + * [Paper 1](https://arxiv.org/abs/2008.12731) + * [Paper 2](https://arxiv.org/abs/1909.07346) + * [Paper 3](https://iopscience.iop.org/article/10.1088/2632-2153/ad76f8/meta) + diff --git a/_gsocproposals/2026/proposal_DEEPLENSE3.md b/_gsocproposals/2026/proposal_DEEPLENSE3.md new file mode 100644 index 00000000..07063b9a --- /dev/null +++ b/_gsocproposals/2026/proposal_DEEPLENSE3.md @@ -0,0 +1,51 @@ +--- +title: Physics Guided Machine Learning on Real Lensing Images +layout: gsoc_proposal +project: DEEPLENSE +project size: 175hr/350hr +year: 2026 +organization: + - Alabama + - MIT +--- + +## Description + +This project focuses on developing a Physics-Informed Neural Network (PINN) framework for analyzing real strong gravitational lensing datasets to study dark matter distribution. Strong gravitational lensing, a key prediction of general relativity, occurs when a massive galaxy or cluster bends light from a background source, creating arcs or Einstein rings. Traditional algorithms struggle or fail entirely when applied to real lensing datasets due to observational complexities and noise. By leveraging PINNs, the project will integrate physical laws directly into the learning process, enhancing the accuracy and interpretability of dark matter inferences. The model will be trained on real lensing images, incorporating observational constraints to refine mass distribution estimates and improve the efficiency of dark matter studies. + +## Duration + +Total project length: 175/350 hours. + +## Difficulty level + +Intermediate/Advanced + +## Task ideas + * Build various physics-informed neural network architectures that are endowed with known physics for real lensing datasets. + * Apply these models to study dark matter in strong lensing images in various contexts: classification, regression, anomaly detection, and more. + +## Expected results + * A more capable architecture that can operate on a wider variety of lensing images, including lensing images created with real galaxy datasets. + * Insight into the lensing systems, and their sub-structures. + +## Requirements +Python, PyTorch, experience with machine learning, knowledge of computer vision techniques, familiarity with autoencoders. + + + +## Mentors + * [Michael Toomey](mailto:ml4-sci@cern.ch) (Massachusetts Institute of Technology) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + + +Please DO NOT contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and include **your CV** and **test results**. The relevant mentors will then get in touch with you. + + +## Links + * [Paper 1](https://arxiv.org/abs/2008.12731) + * [Paper 2](https://arxiv.org/abs/1909.07346) + * [Paper 3](https://ml4physicalsciences.github.io/2024/files/NeurIPS_ML4PS_2024_78.pdf) diff --git a/_gsocproposals/2026/proposal_DEEPLENSE4.md b/_gsocproposals/2026/proposal_DEEPLENSE4.md new file mode 100644 index 00000000..5638ecf3 --- /dev/null +++ b/_gsocproposals/2026/proposal_DEEPLENSE4.md @@ -0,0 +1,54 @@ +--- +title: Gravitational Lens Finding +layout: gsoc_proposal +project: DEEPLENSE +project size: 175hr/350hr +year: 2026 +organization: + - Alabama + - MIT + - PSL +--- + +## Description + +This project focuses on the task of lens finding in the currently available wide-field surveys (e.g., HSC-SSP). The expected number of strong lenses in the large surveys is significantly overpowered by the number of non-lensed objects, which leads to the high number of false positives in typical lens searches. + +The goal of the project is to develop lens finding algorithms, apply them to the observational data, and assess the limitations of the algorithms (for example, analyse the properties of the identified lens population and examine the typical contaminants). + +## Duration + +Total project length: 175/350 hours. + +## Difficulty level +Intermediate/Advanced + +## Task ideas + * Start with architectures previously explored within the DeepLense project and optimise them for the lens finding task. + * Perform the lens search in the real observational data and analyse the properties of the detected lens candidates. + * Evaluate model performance on different surveys. + +## Expected results + * Increase the number of known strong lenses. + * Insight into properties of the identified lens candidates. + +## Requirements +Python, PyTorch, experience with machine learning, familiarity with astrophysics datasets. + + + +## Mentors + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Michael Toomey](mailto:ml4-sci@cern.ch) (Massachusetts Institute of Technology) + * [Anna Parul](mailto:ml4-sci@cern.ch) (Observatoire de Paris) + + +Please DO NOT contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and include **your CV** and **test results**. The relevant mentors will then get in touch with you. + + +## Links + * [Paper 1](https://arxiv.org/abs/1909.07346) + * [Paper 2](https://ml4physicalsciences.github.io/2024/files/NeurIPS_ML4PS_2024_107.pdf) diff --git a/_gsocproposals/2026/proposal_DEEPLENSE5.md b/_gsocproposals/2026/proposal_DEEPLENSE5.md new file mode 100644 index 00000000..f0e4e218 --- /dev/null +++ b/_gsocproposals/2026/proposal_DEEPLENSE5.md @@ -0,0 +1,55 @@ +--- +title: Diffusion Models for Gravitational Lensing Simulation +layout: gsoc_proposal +project: DEEPLENSE +project size: 175hr/350hr +year: 2026 +organization: + - Alabama + - MIT + - Florida +--- + +## Description + +Strong gravitational lensing is a promising probe of the substructure of dark matter to better understand its underlying nature. Deep learning methods have the potential to accurately identify images containing substructure and differentiate WIMP particle dark matter from other well-motivated models, including axions and axion-like particles, warm dark matter, etc. + +Traditional simulations of gravitational lensing are time-consuming and require extensive computational resources. This project proposes the use of diffusion models, a class of generative models known for their ability to produce high-quality, detailed images from a distribution of noise, to simulate strong gravitational lensing images. We aim to generate realistic simulations of gravitational lensing events that can be used to augment datasets for machine learning models and facilitate the development of better domain adaptation and self-supervised models aimed at bridging the gap between simulated and real images of gravitational lensing. Furthermore, we will also investigate leveraging conditional diffusion models to generate gravitational lensing simulations by conditioning the model on specific parameters related to the lensing events, such as the mass distribution of the lensing galaxy, orientation, and the redshift of both the source and the lens. + +## Duration + +Total project length: 175/350 hours. + +## Difficulty level + +intermediate +Intermediate/Advanced + +## Task ideas + * Explore diffusion models for the generation of strong gravitational lensing images. + * Create a diverse dataset of simulated gravitational lensing images under various astrophysical conditions. + +## Expected results + * A diffusion model capable of generating realistic simulations of strong gravitational lensing phenomena. + +## Requirements + * Python, PyTorch and relevant past experience in Machine Learning. + * Familiarity with astrophysics and gravitational lensing is preferred but not required. + + + +## Mentors + * [Michael Toomey](mailto:ml4-sci@cern.ch) (Massachusetts Institute of Technology) + * [Pranath Reddy](mailto:ml4-sci@cern.ch) (University of Florida) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + +Please DO NOT contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and include **your CV** and **test results**. The relevant mentors will then get in touch with you. + + +## Links + * [Paper 1](https://arxiv.org/abs/2008.12731) + * [Paper 2](https://arxiv.org/abs/1909.07346) + * [Paper 3](https://arxiv.org/abs/2112.12121) diff --git a/_gsocproposals/2026/proposal_DEEPLENSE6.md b/_gsocproposals/2026/proposal_DEEPLENSE6.md new file mode 100644 index 00000000..fb1f0107 --- /dev/null +++ b/_gsocproposals/2026/proposal_DEEPLENSE6.md @@ -0,0 +1,53 @@ +--- +title: Data Processing Pipeline for the LSST +layout: gsoc_proposal +project: DEEPLENSE +project size: 175hr/350hr +year: 2026 +organization: + - Alabama + - Brown + - MIT + - PSL +--- + +## Description + +The Rubin Observatory will provide an unprecedented volume of astronomical data, accessible via a dedicated open-source software suite consisting of data reduction pipelines and tools for interacting with calibrated images and catalogs. In order to prepare for the application of DeepLense methods on upcoming LSST data, this project focuses on developing a complementary pipeline that integrates LSST’s data access tools with DeepLense workflows. This pipeline will enable efficient data retrieval, preprocessing, and adaptation for various DeepLense applications such as lens finding, super-resolution, and classification. + +## Duration + +Total project length: 175/350 hours. + +## Difficulty level + +Intermediate/Advanced + +## Task ideas + * Explore the existing LSST data access tools and design the workflow to provide the data for the DeepLense tasks. + * Test the workflow on the mock surveys provided by The Rubin Observatory. + +## Expected results + * A functional pipeline capable of interfacing LSST data with DeepLense applications. + +## Requirements +Python, familiarity with astronomical data processing, and understanding of data access APIs and pipelines. + + + +## Mentors + * [Michael Toomey](mailto:ml4-sci@cern.ch) (Massachusetts Institute of Technology) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Anna Parul](mailto:ml4-sci@cern.ch) (Observatoire de Paris) + * [Lucca Paris](mailto:ml4-sci@cern.ch) (Brown University) + +Please DO NOT contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and include **your CV** and **test results**. The relevant mentors will then get in touch with you. + + +## Links + * [LSST Pipeline Docs](https://pipelines.lsst.io) + * [Paper 1](https://arxiv.org/abs/2008.12731) + * [Paper 2](https://arxiv.org/abs/1909.07346) diff --git a/_gsocproposals/2026/proposal_E2E2.md b/_gsocproposals/2026/proposal_E2E2.md new file mode 100644 index 00000000..9318dcc0 --- /dev/null +++ b/_gsocproposals/2026/proposal_E2E2.md @@ -0,0 +1,55 @@ +--- +title: End-to-End event classification with sparse autoencoders +layout: gsoc_proposal +project: E2E +year: 2026 +organization: + - Alabama + - NISER + +--- + +## Description + +One of the key tasks in particle physics analyses is proper classification of particle collision events based on the parent particles and the process that produced them. To handle this task, we’re developing a flexible machine learning pipeline which can be applied to a broad range of classification tasks. This project will primarily explore the development of sparse autoencoders which can effectively handle particle collision information represented as minimally processed images where the majority of the pixels in the image have very low or zero value. Different techniques have been developed to handle sparse representations such as sparse convolutions and point-cloud structures. + + + +## Duration + +Total project length: 175/350 hours. + +## Task ideas +* Develop a scalable sparse autoencoder model pipeline for event classification and reconstruction. + +## Expected results +* Improve existing code pipeline with features like multi-GPU parallelism and flexible preprocessing and analysis options. +* Deploy the developed models and pipeline on simulated physics data and analyze performance gains and changes in model understanding from the techniques used. + + +## Difficulty level +Advanced + +## Requirements +* Significant experience in Python and Machine Learning in Pytorch. +* Preferably some experience with Transformers and multi-GPU parallelization or with the ROOT library developed by CERN. + + + +## Mentors + * [Diptarko Choudhury](mailto:ml4-sci@cern.ch) (NISER) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Ruchi Chudasama](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Eric Reinhardt](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Emanuele Usai](mailto:ml4-sci@cern.ch) (University of Alabama) + + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + +## Links + * [Paper 1](https://arxiv.org/pdf/1706.01307) + * [Paper 2](https://arxiv.org/abs/1712.07262) + * [Paper 3](https://arxiv.org/pdf/1902.08276) diff --git a/_gsocproposals/2026/proposal_E2E3.md b/_gsocproposals/2026/proposal_E2E3.md new file mode 100644 index 00000000..47cd07db --- /dev/null +++ b/_gsocproposals/2026/proposal_E2E3.md @@ -0,0 +1,59 @@ +--- +title: Diffusion models for fast and accurate simulations of low level CMS experiment data. +layout: gsoc_proposal +project: E2E +year: 2026 +organization: + - Alabama + - NISER + +--- + +## Description + +One of the important aspects of searches for new physics at the Large Hadron Collider (LHC) involves the identification and reconstruction of single particles, jets and event topologies of interest in collision events. The End-to-End Deep Learning (E2E) project in the CMS experiment focuses on the development of these reconstruction and identification tasks with innovative deep learning approaches. + +Diffusion based generative models are strong candidates for Fast Simulation models. The idea of this project is to build a diffusion-based ML model to model the underlying structure of the data which can be used for generating novel samples from the given distribution. Moreover this project also aims to explore conditional diffusion models that can generate specific types of data given a certain input to the model. + + +## Duration + +Total project length: 175/350 hours. + +## Task ideas + * Experiment with different Diffusion based models to find the best one suited for the E2E case. + * Implementation of conditional diffusion models to generate samples based on specific given properties of the jets. + + + +## Expected results + * Implementation of a conditional diffusion model capable of generating realistic samples resembling the CMS E2E data. + + + +## Difficulty level +Intermediate + +## Requirements +Python, PyTorch and some previous experience in Machine Learning. + + + +## Mentors + * [Emanuele Usai](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Diptarko Chaudhari](mailto:ml4-sci@cern.ch) (NISER) + * [Eric Reinhardt](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Ruchi Chudasama](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Bhim Bam](mailto:ml4-sci@cern.ch) (University of Alabama) + + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + +## Links + * [Paper 1](https://arxiv.org/abs/2302.00236) + * [Paper 2](https://arxiv.org/abs/2104.09459) + diff --git a/_gsocproposals/2026/proposal_E2E4.md b/_gsocproposals/2026/proposal_E2E4.md new file mode 100644 index 00000000..a8bdb9f9 --- /dev/null +++ b/_gsocproposals/2026/proposal_E2E4.md @@ -0,0 +1,54 @@ +--- +title: Deep Learning Inference for mass regression +layout: gsoc_proposal +project: E2E +year: 2026 +organization: + - Alabama + - New York University + - Vishwakarma Institute of Technology +--- + +## Description + +One of the important aspects of searches for new physics at the [Large Hadron Collider (LHC)](https://home.cern/science/accelerators/large-hadron-collider) involves the identification and reconstruction of single particles, jets and event topologies of interest in collision events. The End-to-End Deep Learning (E2E) project in the CMS experiment focuses on the development of these reconstruction and identification tasks with innovative deep learning approaches. + + One of the main objectives of the CMS experiments research and development towards high-luminosity LHC is to incorporate cutting-edge machine learning algorithms for particle reconstruction and identification into the CMS software framework (CMSSW) data processing pipeline. This project will focus on the integration of E2E framework with the [CMSSW](https://github.com/cms-sw/cmssw) inference engine for use in reconstruction algorithms in offline and high-level trigger systems of the [CMS](https://home.cern/science/experiments/cms) experiment. + +## Duration + +Total project length: 175/350 hours. + +## Difficulty level +Intermediate + +## Task ideas + * Development of end-to-end deep learning regression for particle property measurements + * Test and integration into CMSSW + + +## Expected results + * Extension of currently integrated E2E CMSSW prototype to include the regression model inference + + +## Requirements +C++, Python, PyTorch and some previous experience in Machine Learning. + + + +## Mentors + * [Ruchi Chudasama](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Shravan Chaudhari](mailto:ml4-sci@cern.ch) (New York University) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Purva Chaudhari](mailto:ml4-sci@cern.ch) (Vishwakarma Institute of Technology) + + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + + + +## Links + * [Paper 1](https://arxiv.org/abs/2309.14254) diff --git a/_gsocproposals/2026/proposal_E2E5.md b/_gsocproposals/2026/proposal_E2E5.md new file mode 100644 index 00000000..a68a585a --- /dev/null +++ b/_gsocproposals/2026/proposal_E2E5.md @@ -0,0 +1,65 @@ +--- +title: Next generation vision transformers for end to end mass regression and classification +layout: gsoc_proposal +project: E2E +year: 2026 +organization: + - Alabama + - New York University + - Vishwakarma Institute of Technology + - NISER +--- + + +## Description +One of the important aspects of searches for new physics at the [Large Hadron Collider (LHC)](https://home.cern/science/accelerators/large-hadron-collider) involves the identification and reconstruction of single particles, jets and event topologies of interest in collision events. The End-to-End Deep Learning (E2E) project in the CMS experiment focuses on the development of these reconstruction and identification tasks with innovative deep learning approaches. + + +A minimal representation of particle collision data is as an image representation of particle hits in different layers of the detector. This project will explore development of vision transformers incorporating the latest knowledge in the field of computer vision to classify particle collision images by the type of heavy particles generated in the collision and reconstruct the mass of those particles. + + +## Duration +Total project length: 175/350 hours. + + +## Difficulty level +Intermediate + + +## Task ideas + * Development of vision transformer models for end-to-end classification and regression + + +## Expected results + * Trained vision transformer models incorporating the latest computer vision techniques + * Benchmarks against baseline vision transformers and comparison of different vision transformer types + + + + +## Requirements +C++, Python, PyTorch and some previous experience in Machine Learning. + + + + + +## Mentors + * [Ruchi Chudasama](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Shravan Chaudhari](mailto:ml4-sci@cern.ch) (New York University) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Purva Chaudhari](mailto:ml4-sci@cern.ch) (Vishwakarma Institute of Technology) + * [Diptarko Choudhury](mailto:ml4-sci@cern.ch) (NISER) + + + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + + + + +## Links + * [Paper 1](https://arxiv.org/abs/2309.14254) diff --git a/_gsocproposals/2026/proposal_E2E6.md b/_gsocproposals/2026/proposal_E2E6.md new file mode 100644 index 00000000..235acaf8 --- /dev/null +++ b/_gsocproposals/2026/proposal_E2E6.md @@ -0,0 +1,66 @@ +--- +title: End-to-End particle collision track reconstruction +layout: gsoc_proposal +project: E2E +year: 2026 +organization: + - Alabama + - NISER + - New York University + - Vishwakarma Institute of Technology +--- + + +## Description + + +One of the important aspects of searches for new physics at the [Large Hadron Collider (LHC)](https://home.cern/science/accelerators/large-hadron-collider) involves the identification and reconstruction of single particles, jets and event topologies of interest in collision events. The End-to-End Deep Learning (E2E) project in the CMS experiment focuses on the development of these reconstruction and identification tasks with innovative deep learning approaches. + + +One potential approach for particle reconstruction is to take minimally processed detector hit information from jets of decayed particles and rebuild the tracks that the originating particles followed and derive further quantities from those tracks. This project will focus on using machine learning models to achieve this reconstruction. + + +## Duration + + +Total project length: 175/350 hours. + + +## Difficulty level +Intermediate + + +## Task ideas + * Development of end-to-end deep learning track reconstruction algorithm + * Test and integration into CMSSW + + +## Expected results + * Trained track reconstruction algorithm and benchmarks against ground truth in simulated data + + + + +## Requirements +C++, Python, PyTorch and some previous experience in Machine Learning. + + + + + +## Mentors + * [Ruchi Chudasama](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Shravan Chaudhari](mailto:ml4-sci@cern.ch) (New York University) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Purva Chaudhari](mailto:ml4-sci@cern.ch) (Vishwakarma Institute of Technology) + * [Diptarko Choudhury](mailto:ml4-sci@cern.ch) (NISER) + + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + + +## Links + * [Paper 1](https://arxiv.org/abs/2309.14254) diff --git a/_gsocproposals/2026/proposal_E2E7.md b/_gsocproposals/2026/proposal_E2E7.md new file mode 100644 index 00000000..eff6b415 --- /dev/null +++ b/_gsocproposals/2026/proposal_E2E7.md @@ -0,0 +1,67 @@ +--- +title: Foundation models for End-to-End event reconstruction +layout: gsoc_proposal +project: E2E +year: 2026 +organization: + - Alabama + - NISER + - New York University + - Vishwakarma Institute of Technology +--- + + +## Description + + +One of the important aspects of searches for new physics at the [Large Hadron Collider (LHC)](https://home.cern/science/accelerators/large-hadron-collider) involves the identification and reconstruction of single particles, jets and event topologies of interest in collision events. The End-to-End Deep Learning (E2E) project in the CMS experiment focuses on the development of these reconstruction and identification tasks with innovative deep learning approaches. + + +This project will focus on the development of foundation models for end-to-end particle reconstruction with the goal of performing generative, classification and regression tasks. +## Duration + + +Total project length: 175/350 hours. + + +## Difficulty level +Advanced + + +## Task ideas + * Develop a pre-training strategy for learning robust representations of high-energy physics detector data. +* Fine-tune the foundation model for multiple tasks such as classification, generative, super-resolution, and regression. +* Evaluate the model’s performance on different HEP datasets and benchmark against individual methods for classification, regression etc. + + + + +## Expected results + * Trained foundation model and benchmarks +## Requirements +C++, Python, PyTorch and some previous experience in Machine Learning. + + + + + +## Mentors + * [Ruchi Chudasama](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Shravan Chaudhari](mailto:ml4-sci@cern.ch) (New York University) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Purva Chaudhari](mailto:ml4-sci@cern.ch) (Vishwakarma Institute of Technology) + * [Diptarko Choudhury](mailto:ml4-sci@cern.ch) (NISER) + + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + + + + + + +## Links + * [Paper 1](https://arxiv.org/abs/2309.14254) diff --git a/_gsocproposals/2026/proposal_E2E8.md b/_gsocproposals/2026/proposal_E2E8.md new file mode 100644 index 00000000..187cf749 --- /dev/null +++ b/_gsocproposals/2026/proposal_E2E8.md @@ -0,0 +1,62 @@ +--- +title: Discovery of hidden symmetries and conservation laws +layout: gsoc_proposal +project: E2E +year: 2026 +organization: + - Alabama + - NISER + +--- + +## Description +Recent success in the domain of unsupervised and semi-supervised learning has been lately a pivotal factor for development of Physics Aware and Symmetry Aware Machine Learning techniques where a model learns the symmetry of a dataset as a meta task and ends up learning the physics through the same. + +Although most of the symmetries that we work with for SM physics are well defined and formulated, they can be well interpreted in 4-vector or 4-momenta basis. With change of representation the symmetries become elusive and difficult to write and work with. This calls for machine learning techniques that can learn the representation of the given symmetry through the means of a conserved quantity for a given abstract representation space. + +Learning these symmetries not only makes us more prepared to deal with the physics constraints in these abstract spaces and coordinates but also makes us able to build neural networks that are invariant to these symmetries. Such neural networks as seen from the existing literature are more robust, stable, interpretable and data efficient. + +This project will focus on ways to learn hidden symmetries combining the works of +* https://arxiv.org/abs/2109.09721 +* https://arxiv.org/pdf/2301.05638v1 +* https://arxiv.org/abs/2302.00236 + +## Duration +Total project length: 175/350 hours. + +## Task ideas + * Develop a deep learning model capable of uncovering the symmetries present in the toy datasets and then extending it to more abstract use cases. + * Using the symmetries discovered to probe the phase space of the CMS datasets. + * Building physics aware models using the symmetries. + +## Expected results + * Discover symmetries and conserved quantities present in the CMS dataset. + * Benchmark the models with other previous works in terms of data efficiency and invariance with respect to symmetry operations. + +## Difficulty level +Advanced + +## Requirements +* Proficiency in C++, Python +* Experience with PyTorch and TensorFlow +* Previous experience in Deep Learning + + + +## Mentors + * [Diptarko Choudhury](mailto:ml4-sci@cern.ch) (NISER) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Ruchi Chudasama](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Samuel Campbell](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Emanuele Usai](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Alex Roman](mailto:ml4-sci@cern.ch) (University of Alabama) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + +## Links + * [Paper 1](https://arxiv.org/abs/2109.09721) + * [Paper 2](https://arxiv.org/pdf/2301.05638v1) + * [Paper 3](https://arxiv.org/abs/2302.00236) diff --git a/_gsocproposals/2026/proposal_EXXA1.md b/_gsocproposals/2026/proposal_EXXA1.md new file mode 100644 index 00000000..ce08420f --- /dev/null +++ b/_gsocproposals/2026/proposal_EXXA1.md @@ -0,0 +1,54 @@ +--- +title: Equivariant Vision Networks for Predicting Planetary Systems' Architectures +layout: gsoc_proposal +project: EXXA +year: 2026 +organization: + - University of Alabama + - Oxford University +--- + +#### Description + +The architecture of planetary systems, including the number of planets and their orbital configurations, provides crucial insights into their formation and evolution. This project aims to leverage the capabilities of equivariant computer vision networks to predict the number of planets in observed systems from astronomical data. Equivariant networks, due to their ability to handle rotational and reflectional symmetries inherent in astronomical images, offer a promising approach for analyzing spatial data without loss of predictive accuracy due to orientation changes. By regressing on the number of planets, this project seeks to develop a robust model that can adapt to the complexities of observational data, including direct images, transit data, and radial velocity measurements. + +#### Duration + +Total project length: 175/350 hours. + +#### Task Ideas + + - Review and implement state-of-the-art equivariant neural network architectures suitable for astronomical data analysis. + - Curate a dataset from existing astronomical surveys, including labeled systems with known numbers of planets, for training and testing the model. + - Train the equivariant network on the curated dataset, optimizing for accurate regression on the number of planets in a system. + - Evaluate the model's performance using a separate test set, focusing on its ability to generalize across different types of planetary systems and observational techniques. + - Explore the integration of additional data modalities (e.g., spectroscopic data) to improve the model's predictive capabilities. + +#### Expected Results + + - A highly accurate equivariant computer vision model capable of regressing on the number of planets in observed systems, accounting for the complexities and variabilities in astronomical data. + - A comprehensive evaluation of the model's performance, highlighting its strengths and potential areas for improvement. + - Documentation and guidelines for applying the model to new datasets, facilitating further research and potential real-world applications in exoplanet discovery and characterization. + +## Requirements + +* Python, PyTorch, C/Fortran +* Background in astronomy is a bonus but not a requirement + +## Test +Use [this link](https://docs.google.com/document/d/1t2cSxEx3vIa6uirfkMkF92rWZM4tTJ_V-lkpYSdukVQ/edit?usp=sharing) for instructions on completing the test. + +## Links + +* [Previous Paper 1](https://iopscience.iop.org/article/10.3847/1538-4357/aca477) +* [Previous Paper 2](https://iopscience.iop.org/article/10.3847/1538-4357/acc737) +* [Previous Paper 3](https://nips.cc/virtual/2023/76224) + +## Mentors + +* [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Jason Terry](mailto:jpterry@uga.edu) (Oxford University) +* [Emily Panek](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Alex Roman](mailto:ml4-sci@cern.ch) (University of Alabama) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_EXXA2.md b/_gsocproposals/2026/proposal_EXXA2.md new file mode 100644 index 00000000..ed1b3a02 --- /dev/null +++ b/_gsocproposals/2026/proposal_EXXA2.md @@ -0,0 +1,54 @@ +--- +title: Denoising Astronomical Observations of Protoplanetary Disks +layout: gsoc_proposal +project: EXXA +year: 2026 +organization: + - University of Alabama + - Oxford University +--- + +#### Description + +Recent advancements in observational astronomy have given the field the ability to resolve protoplanetary disks, the sites of planet formation, in unprecendeted detail. Array telescopes, such as ALMA and VLT, produce data that have revolutionized the study of these environments, spurring a rapid increase in the number of observations, significant advancements in theoretical understandings of planet formation processes, and the need for more efficient and accurate data processing. Traditional data processing algorithms, while advanced and powerful, are often time-consuming, computationally expensive, and can still produce noisy results. State-of-the-art machine learning algorithms, such as diffusion networks, are well-suited to this task and are a prime candidate for implementation in the field of protoplanetary disk astronomy. The purpose of this project is to develop machine learning algorithms to create a pipeline that denoises observational data more quickly and to a greater extent than current methods. + +#### Duration + +Total project length: 175/350 hours. + +#### Task Ideas + + - Use synthetic observations of protoplanetary disks created using hydrodynamic simulations and radiative transfer to train machine learning models capable of denoising observational data. + - Investigate and select suitable machine learning denoising models that can handle the complexity and heterogeneity of the data. + - Develop a training pipeline that includes data augmentation techniques to enrich the training dataset and improve model robustness. + - Implement the model and train it on the prepared dataset, optimizing for the ability to reproduce the raw synthetic observations. + - Generalize the model to other types of observations, including line emission data and observations from other telescopes. + - Validate the model's performance on real observational data from ALMA and VLT, comparing the performance to traditional methods. + +#### Expected Results + + - A machine learning denoising model tailored for removing noise from astronomical observations, leveraging the unique characteristics of observational data. + - A detailed analysis of the model's performance in removing noise from the data, including comparisons to traditional data processing methods and real observational data. + - A publicly available dataset curated for training and testing the model, accompanied by a comprehensive data preprocessing and augmentation pipeline. + - Documentation outlining the model architecture, training process, and guidelines for application to new datasets, ensuring reproducibility and facilitating future research in the field. + + +## Requirements + +* Python, PyTorch, C/Fortran +* Background in astronomy is a bonus but not a requirement + +## Test +Use [this link](https://docs.google.com/document/d/1t2cSxEx3vIa6uirfkMkF92rWZM4tTJ_V-lkpYSdukVQ/edit?usp=sharing) for instructions on completing the test. + +## Links + +* [Previous Paper 1](https://iopscience.iop.org/article/10.3847/1538-4357/aca477) +* [Previous Paper 2](https://iopscience.iop.org/article/10.3847/1538-4357/acc737) + +## Mentors + +* [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Jason Terry](mailto:jpterry@uga.edu) (Oxford University) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_EXXA3.md b/_gsocproposals/2026/proposal_EXXA3.md new file mode 100644 index 00000000..8e79fb41 --- /dev/null +++ b/_gsocproposals/2026/proposal_EXXA3.md @@ -0,0 +1,50 @@ +--- +title: Exoplanet Atmosphere Characterization +layout: gsoc_proposal +project: EXXA +year: 2026 +organization: + - University of Alabama + - Oxford University +--- + +#### Description + +The characterization of exoplanet atmospheres is crucial for understanding their compositions, weather patterns, and potential habitability. This project aims to develop machine learning models to analyze spectral data from exoplanets, identifying chemical abundances, cloud/haze structure and different atmospheric processes . The project will leverage data from telescopes and space missions, along with simulations of exoplanetary atmospheres under various conditions, to train and validate the models. + +#### Duration + +Total project length: 175/350 hours. + +#### Task Ideas + + * Perform simulations of exoplanetary atmospheres with diverse atmospheric conditions: non-isothermal atmospheres; chemical equilibrium/disequilibrium; dawn/dusk asymmetry; distinct weather patterns; cloud/haze coverage etc. + * Train machine learning models on simulated spectral data to recognize different atmospheric conditions and physical processes using transmission and/or emission spectroscopy. + * Develop a ML strategy for searching of potential biosignatures in spectroscopic observations. + * Apply the trained models to real observational data from missions like Hubble, JWST, and future telescopes to characterize exoplanet atmospheres. + * Explore the use of deep learning techniques for enhancing the models’ ability to identify subtle spectral signatures associated with different atmospheric processes. + + +#### Expected Results + + * A set of machine learning models capable of accurately characterizing exoplanet atmospheres. + * Analysis of the models’ performance on observational data, demonstrating their applicability to current and future exoplanet studies. + + + +## Requirements + +* Python, PyTorch, C/Fortran +* Background in astronomy is a bonus but not a requirement + +## Test +Use [this link](https://docs.google.com/document/d/1t2cSxEx3vIa6uirfkMkF92rWZM4tTJ_V-lkpYSdukVQ/edit?usp=sharing) for instructions on completing the test. + +## Mentors + +* [Katia Matcheva](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Konstantin Matchev](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Jason Terry](mailto:jpterry@uga.edu) (Oxford University) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_EXXA4.md b/_gsocproposals/2026/proposal_EXXA4.md new file mode 100644 index 00000000..021f75ff --- /dev/null +++ b/_gsocproposals/2026/proposal_EXXA4.md @@ -0,0 +1,52 @@ +--- +title: Foundation Models for Exoplanet Characterization +layout: gsoc_proposal +project: EXXA +year: 2026 +organization: + - University of Alabama + - Oxford University +--- + +#### Description + +Advancing the understanding of exoplanets and planet formation requires a wide variety of observational methods and data modalities. Planet formation is a complex process that involves the assembly of a planet from a protoplanetary disk, an environment that instruments have only recently been able to resolve. These observations rely mostly on image data, including line emission and continuum data. The analysis of this data is a complex process, but, when done successfully, it opens new avenues for understanding planet formation, the resulting systems of exoplanets, and the potential of these systems for habitability. A complementary route is to use data from the atmospheres of exoplanets. The characterization of exoplanet atmospheres is crucial for understanding their compositions, weather patterns, and potential habitability. This project aims to develop a foundation machine learning models that will analyze data of different environments from different instruments to further our understanding of planet formation, extoplanet systems, exoplanet properties, and, ultimately, the potential of these systems for habitability. The models will use image data of disks, spectral data from exoplanets, identifying forming exoplanets, processes and substructures that are important in protoplanetary disk evolution, chemical abundances in exoplanet atmosphers, cloud/haze structure, and different atmospheric processes. The project will leverage data from telescopes and space missions, along with simulations of protoplanetary disks and exoplanetary atmospheres under various conditions, to train and validate the models. + +#### Duration + +Total project length: 175/350 hours. + +#### Task Ideas + * Assemble a consolidated database using existing protoplanetary disk and exoplanet transit observations from different instruments, spectral resolutions, and spectral ranges from publicly available archives. + * Develop an ML approach to overcome the specific instrumental differences for the different observations. Training can be done on existing synthetic databases simulating the instrument performance (Hubble Space Telescope, JWST, ALMA, Ariel etc.) + * Apply the trained models to real observational data from Hubble, JWST, ALMA, and future telescopes to characterize protoplanetary disks and exoplanet atmospheres. + * Explore the use of different ML architectures for enhancing the models’ ability to identify subtle signatures in the different data modalities associated with important physical properties and processes that may influence the formation and identification of habitable systems. + + + +#### Expected Results + * A set of machine learning models capable of accurately characterizing protoplanetary disks and exoplanet atmospheres using inputs from different observations. + * Analysis of the models’ performance on observational data, demonstrating their applicability to current and future exoplanet studies. + + + + +## Requirements + * Python + * PyTorch or TensorFlow (or similar) + * Background in astronomy is a bonus but not a requirement + + +## Test +Use [this link](https://docs.google.com/document/d/1t2cSxEx3vIa6uirfkMkF92rWZM4tTJ_V-lkpYSdukVQ/edit?usp=sharing) for instructions on completing the test. + +## Mentors + +* [Katia Matcheva](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Konstantin Matchev](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Jason Terry](mailto:jpterry@uga.edu) (Oxford University) +* [Alex Roman](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Emilie Panek](mailto:ml4-sci@cern.ch) (University of Alabama) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_EXXA5.md b/_gsocproposals/2026/proposal_EXXA5.md new file mode 100644 index 00000000..94fd645b --- /dev/null +++ b/_gsocproposals/2026/proposal_EXXA5.md @@ -0,0 +1,53 @@ +--- +title: Quantum Machine Learning for Exoplanet Characterization +layout: gsoc_proposal +project: EXXA +year: 2026 +organization: + - University of Alabama +--- + +#### Note +This project is a collaboration with EXXA and QMLHEP. + +#### Description + +The characterization of exoplanet atmospheres is crucial for understanding their compositions, weather patterns, and potential habitability. This project aims to develop quantum machine learning models to analyze spectral data from exoplanets, identifying chemical abundances, cloud/haze structure and different atmospheric processes. The project will leverage data from telescopes and space missions, along with simulations of exoplanetary atmospheres under various conditions, to train and validate the models. + +#### Duration + +Total project length: 175/350 hours. + +#### Task Ideas + * Identify suitable latent representations of the exoplanet transmission data. + * Develop a quantum machine learning architecture for detecting anomalous exoplanets based on synthetic transmission spectra. + * Develop a quantum generative model for simulating exoplanet transmission spectra. + * Apply the trained models to real observational data from missions like Hubble, JWST, and future telescopes to characterize exoplanet atmospheres. + * Benchmark the performance of the developed quantum machine learning models against their classical counterparts. + + +#### Expected Results + * A set of quantum machine learning models capable of accurately modeling exoplanet atmospheres or flagging anomalous spectra. + * Analysis of the models’ performance on observational data, demonstrating their applicability to current and future exoplanet studies. + + +## Requirements + * Python + * PyTorch or TensorFlow (or similar) + * Some experience with Qiskit or Pennylane is preferred + * Background in astronomy is a bonus but not a requirement + + + +## Test +Use [this link](https://docs.google.com/document/d/1t2cSxEx3vIa6uirfkMkF92rWZM4tTJ_V-lkpYSdukVQ/edit?usp=sharing) for instructions on completing the test. + +## Mentors + +* [Katia Matcheva](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Konstantin Matchev](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Alex Roman](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Emilie Panek](mailto:ml4-sci@cern.ch) (University of Alabama) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_FALCON1.md b/_gsocproposals/2026/proposal_FALCON1.md new file mode 100644 index 00000000..50191916 --- /dev/null +++ b/_gsocproposals/2026/proposal_FALCON1.md @@ -0,0 +1,46 @@ +--- +title: Diffusion Models for Fast Detector Simulation +layout: gsoc_proposal +project: DeepFALCON +year: 2026 +organization: +- Davidson +- FSU +- EPFL +- CERN +- Middle East Technical University +- Princess Sumaya University for Technology +--- + +## Description +[DeepFalcon](https://ml4physicalsciences.github.io/2020/files/NeurIPS_ML4PS_2020_138.pdf) is a generative ultra-fast non-parametric detector simulation package. The goal of this project is to extend DeepFalcon to include diffusion models to improve simulation of calorimeter and tracker hits from particle interaction with the detectors + +## Task ideas and expected results + * Implementation of diffusion networks into the falcon training and inference. + +## Duration + +Total project length: 175/350 hours. + + + + +## Requirements +Strong machine learning skills, good knowledge of C++ and Python. Interest in Machine Learning algorithms and applications. + +## Mentors + * [Harrison Prosper](mailto:ml4-sci@cern.ch) (Florida State University) + * [Michelle Kuchera](mailto:ml4-sci@cern.ch) (Davidson College) + * [Ali Hariri](mailto:ml4-sci@cern.ch) (EPFL) + * [Sinan Gençoğlu](mailto:ml4-sci@cern.ch) (Middle East Technical University) + * [Amal Saif](mailto:ml4-sci@cern.ch) (Princess Sumaya University for Technology) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + +## Links + * [Paper 1](https://arxiv.org/abs/2104.01725) + * [Paper 2](https://ml4physicalsciences.github.io/2020/files/NeurIPS_ML4PS_2020_138.pdf) + * [Paper 3](https://arxiv.org/abs/1605.02684) diff --git a/_gsocproposals/2026/proposal_FALCON2.md b/_gsocproposals/2026/proposal_FALCON2.md new file mode 100644 index 00000000..709cfe55 --- /dev/null +++ b/_gsocproposals/2026/proposal_FALCON2.md @@ -0,0 +1,48 @@ +--- +title: Graph Representation Learning for Fast Detector Simulation +layout: gsoc_proposal +project: DeepFALCON +year: 2026 +organization: + - Alabama + - EPFL + - Middle East Technical University + - Princess Sumaya University for Technology + - CERN + +--- + +## Description +[DeepFalcon](https://ml4physicalsciences.github.io/2020/files/NeurIPS_ML4PS_2020_138.pdf) is a generative ultra-fast non-parametric detector simulation package. The goal of this project is to extend DeepFalcon to extend the current graph VAE to improve simulation of calorimeter and tracker hits from particle interaction with the detectors + +## Task ideas and expected results + * Extend and scale Graph VAEs to the multi-layer detectors. + * Test various graph connectivities, pooling etc. to optimize the performance. + +## Duration + +Total project length: 175/350 hours. + + + +## Requirements +Strong machine learning skills, good knowledge of C++ and Python. Interest in Machine Learning algorithms and applications. + +## Mentors + * [Ali Hariri](mailto:ml4-sci@cern.ch) (EPFL) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Emanuele Usai](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Sinan Gençoğlu](mailto:ml4-sci@cern.ch) (Middle East Technical University) + * [Amal Saif](mailto:ml4-sci@cern.ch) (Princess Sumaya University for Technology) + + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + + +## Links + * [Paper 1](https://arxiv.org/abs/2104.01725) + * [Paper 2](https://ml4physicalsciences.github.io/2020/files/NeurIPS_ML4PS_2020_138.pdf) + * [Paper 3](https://arxiv.org/abs/1605.02684) diff --git a/_gsocproposals/2026/proposal_FALCON3.md b/_gsocproposals/2026/proposal_FALCON3.md new file mode 100644 index 00000000..d98b98c9 --- /dev/null +++ b/_gsocproposals/2026/proposal_FALCON3.md @@ -0,0 +1,47 @@ +--- +title: Graph Transformers for Fast Detector Simulation +layout: gsoc_proposal +project: DeepFALCON +year: 2026 +organization: + - Alabama + - EPFL + - Middle East Technical University + - Princess Sumaya University for Technology + +--- + +## Description +[DeepFalcon](https://ml4physicalsciences.github.io/2020/files/NeurIPS_ML4PS_2020_138.pdf) is a generative ultra-fast non-parametric detector simulation package. The goal of this project is to extend DeepFalcon to include diffusion models to improve simulation of calorimeter and tracker hits from particle interaction with the detectors + +## Task ideas and expected results +* Extension of the Graph VAE model with Transformer architecture to improve edge prediction + +## Duration + +Total project length: 175/350 hours. + + + +## Requirements +Strong machine learning skills, good knowledge of C++ and Python. Interest in Machine Learning algorithms and applications. + +## Mentors + * [Ali Hariri](mailto:ml4-sci@cern.ch) (EPFL) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Emanuele Usai](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Sinan Gençoğlu](mailto:ml4-sci@cern.ch) (Middle East Technical University) + * [Amal Saif](mailto:ml4-sci@cern.ch) (Princess Sumaya University for Technology) + + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + + + +## Links + * [Paper 1](https://arxiv.org/abs/2104.01725) + * [Paper 2](https://ml4physicalsciences.github.io/2020/files/NeurIPS_ML4PS_2020_138.pdf) + * [Paper 3](https://arxiv.org/abs/1605.02684) diff --git a/_gsocproposals/2026/proposal_FALCON4.md b/_gsocproposals/2026/proposal_FALCON4.md new file mode 100644 index 00000000..9934f167 --- /dev/null +++ b/_gsocproposals/2026/proposal_FALCON4.md @@ -0,0 +1,41 @@ +--- +title: Optimal Transport in High Energy Physics +layout: gsoc_proposal +project: DeepFALCON +year: 2026 +organization: + - Alabama + - EPFL + - Middle East Technical University + - Princess Sumaya University for Technology + - CERN + +--- + +## Description +The concept of Optimal Transport (OT) can be very useful in quantifying a distance metric between probability distributions. This project will focus on applying optimal transport methods to classification, anomaly detection and generative modeling tasks in particle physics. + +## Duration + +Total project length: 175/350 hours. + +## Task ideas and expected results +Application of optimal transport techniques using deep learning for classification, anomaly detection and graph generative models in high energy physics. + + + +## Requirements +Strong machine learning skills, good knowledge of C++ and Python. Interest in Machine Learning algorithms and applications. + +## Mentors + * [Ali Hariri](mailto:ml4-sci@cern.ch) (EPFL) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Emanuele Usai](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Sinan Gençoğlu](mailto:ml4-sci@cern.ch) (Middle East Technical University) + * [Amal Saif](mailto:ml4-sci@cern.ch) (Princess Sumaya University for Technology) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + diff --git a/_gsocproposals/2026/proposal_FASEROH.md b/_gsocproposals/2026/proposal_FASEROH.md new file mode 100644 index 00000000..bb239d5d --- /dev/null +++ b/_gsocproposals/2026/proposal_FASEROH.md @@ -0,0 +1,40 @@ +--- +title: Fast Accurate Symbolic Empirical Representation Of Histograms +layout: gsoc_proposal +project: FASEROH +year: 2023 +organization: + - Kentucky + - FSU +--- + +## Description + +State-of-the-art sequence to sequence models (seq2seq) have yielded spectacular advances in neural machine translation (NMT) (see, for example, [Ref1](https://arxiv.org/pdf/1912.02047.pdf) ). Recently, these models have been successfully applied to symbolic mathematics by conceptualizing the latter as translation from one sequence of symbols to another ( [Ref2](https://arxiv.org/abs/1912.01412) ). It is easy to imagine numerous tasks that can be construed as translations. In the proposed Gsoc project the goal is to create a tool that automatically provides an accurate symbolic representation of a histogram by construing the problem as one of translation from a histogram to a symbolic function. We call the project Fast Accurate Symbolic Empirical Representation Of Histograms (FASEROH). + +## Duration + +Total project length: 175/350 hours. + +## Task ideas + * The goal of the project is to use available seq2seq models to create the mapping between an histogram and a symbolic function. See more details [here](https://ml4sci.org/assets/faseroh.pdf). + + +## Expected results + * Since this project will be a proof of principle, the seq2seq task will be limited to 1-dimensional histograms defined on the unit interval. + +## Requirements +Python, previous experience in Machine Learning. + + + + +## Mentors + * [Abdulhakim Alnuqaydan](mailto:ml4-sci@cern.ch) (University of Kentucky) + * [Harrison Prosper](mailto:ml4-sci@cern.ch) (Florida State University) + + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_GENIE1.md b/_gsocproposals/2026/proposal_GENIE1.md new file mode 100644 index 00000000..6aa4b566 --- /dev/null +++ b/_gsocproposals/2026/proposal_GENIE1.md @@ -0,0 +1,52 @@ +--- +title: Deep Graph anomaly detection with contrastive learning for new physics searches +layout: gsoc_proposal +project: GENIE +year: 2026 +organization: + - Alabama + - EPFL + - TUM +--- + +## Description +In the search for new physics at the [Large Hadron Collider (LHC)](https://home.cern/science/accelerators/large-hadron-collider) a possible approach is to employ anomaly detection techniques to spot events that deviate from the standard model in an unsupervised manner. There have been many such studies using e.g. convolutional autoencoders. In previous GSoC projects, the usage of graph based models have been very successful in generative tasks. In this project we therefore want to employ a graph based architecture to perform anomaly detection on particle collision data. + +The intended model is supposed to perform anomaly detection on a graph-level, corresponding literature can be found in the ‘Links’ section. + +## Duration + +Total project length: 175/350 hours. + +## Difficulty Level + * Intermediate/Advanced + +## Task ideas + * Development of a model for graph based anomaly detection + * Benchmarking on benchmark datasets and comparison to a convolutional autoencoder + +## Expected results + * Trained graph based anomaly detection model + * Benchmark of on selected datasets + +## Test +Please use [this link](https://docs.google.com/document/d/1lRJocBJ0MgrTTth07xAinz_2tZRgPJF6weVkR8TGPxI/edit?usp=sharing) to access the test for this project. + +## Requirements + * Skills: Python, PyTorch or TensorFlow and some previous experience in Machine Learning. + * Ability to work independently and proactive on a research project + +## Mentors + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Ali Hariri](mailto:ml4-sci@cern.ch) (EPFL) + * [Amal Saif](mailto:ml4-sci@cern.ch) (PSUT) + * [Tom Magorsch](mailto:ml4-sci@cern.ch) (TUM) + +## Links + * [LHC](https://home.cern/science/accelerators/large-hadron-collider) + * [Paper 1](https://doi.org/10.1038/s41598-022-22086-3) + * [Paper 2](https://arxiv.org/abs/2104.01725) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + + diff --git a/_gsocproposals/2026/proposal_GENIE2.md b/_gsocproposals/2026/proposal_GENIE2.md new file mode 100644 index 00000000..0be04dea --- /dev/null +++ b/_gsocproposals/2026/proposal_GENIE2.md @@ -0,0 +1,48 @@ +--- +title: Learning Parametrization with Implicit Neural Representations +layout: gsoc_proposal +project: GENIE +year: 2026 +organization: + - Alabama + - EPFL + - Princess Sumaya University for Technology + - BITS Pilani Goa +--- + +## Description +In the search for new physics at the [Large Hadron Collider (LHC)](https://home.cern/science/accelerators/large-hadron-collider) it is necessary to accurately learn the representation of events that may be described in different ways (point clouds, graphs, grids). Different detector systems can lead to different optimal representations and no single approach is ideal for all detector systems. Conventional representations are usually discrete (point clouds, grids etc.). This project focuses on an alternative approach of parametrizing the representation in terms of a continuous function and approximating it with a neural network. + + +## Duration + +Total project length: 175/350 hours. + +## Difficulty Level + * Intermediate/Advanced + +## Task ideas + * Develop implicit neural representation model for particle physics data + * Benchmarking on benchmark datasets + +## Expected results + * Functional INR model + * Benchmark results on selected datasets + +## Test +Please use [this link](https://docs.google.com/document/d/1lRJocBJ0MgrTTth07xAinz_2tZRgPJF6weVkR8TGPxI/edit?usp=sharing) to access the test for this project. + +## Requirements + * Skills: Python, PyTorch or Keras and some previous experience in Machine Learning. + + +## Mentors + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Ali Hariri](mailto:ml4-sci@cern.ch) (EPFL) + * [Amal Saif](mailto:ml4-sci@cern.ch) (PSUT) + +## Links + * [LHC](https://home.cern/science/accelerators/large-hadron-collider) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + diff --git a/_gsocproposals/2026/proposal_GENIE3.md b/_gsocproposals/2026/proposal_GENIE3.md new file mode 100644 index 00000000..c6c91c15 --- /dev/null +++ b/_gsocproposals/2026/proposal_GENIE3.md @@ -0,0 +1,47 @@ +--- +title: Learning the Latent Structure with Diffusion Models +layout: gsoc_proposal +project: GENIE +year: 2026 +organization: + - Alabama + - EPFL + - Princess Sumaya University for Technology + - BITS Pilani Goa +--- + +## Description +In the search for new physics at the [Large Hadron Collider (LHC)](https://home.cern/science/accelerators/large-hadron-collider) it is necessary to simulate billions of high-energy collision events at high fidelity. One approach is to use accurate generative modeling to sample from latent space distribution. This project focuses on diffusion models as means of learning the latent structure to produce accurate multidimensional distribution of point cloud data of hits produced by particle interactions with the detectors. + +## Duration + +Total project length: 175/350 hours. + +## Difficulty Level + * Intermediate/Advanced + +## Task ideas + * Develop diffusion models for learning the latent space structure + * Benchmarking on benchmark datasets and comparison to other generative models (VAEs/GANs/etc.) + +## Expected results + * Functional diffusion model + * Benchmarks on selected datasets + +## Test +Please use [this link](https://docs.google.com/document/d/1lRJocBJ0MgrTTth07xAinz_2tZRgPJF6weVkR8TGPxI/edit?usp=sharing) to access the test for this project. + +## Requirements + * Skills: Python, PyTorch or Keras and some previous experience in Machine Learning. + + +## Mentors + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Ali Hariri](mailto:ml4-sci@cern.ch) (EPFL) + * [Amal Saif](mailto:ml4-sci@cern.ch) (PSUT) + + +## Links + * [LHC](https://home.cern/science/accelerators/large-hadron-collider) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_GENIE4.md b/_gsocproposals/2026/proposal_GENIE4.md new file mode 100644 index 00000000..a92045e5 --- /dev/null +++ b/_gsocproposals/2026/proposal_GENIE4.md @@ -0,0 +1,48 @@ +--- +title: Non-local GNNs for Jet Classification +layout: gsoc_proposal +project: GENIE +year: 2026 +organization: + - Alabama + - EPFL + - Princess Sumaya University for Technology + - BITS Pilani Goa +--- + +## Description +In the search for new physics at the [Large Hadron Collider (LHC)](https://home.cern/science/accelerators/large-hadron-collider) a possible approach is to employ anomaly detection techniques to spot events that deviate from the standard model in an unsupervised manner. There have been many such studies using e.g. convolutional autoencoders. In previous GSoC projects, the usage of graph-based models has been very successful in generative tasks.Motivated by the success of graph-based models in various computational tasks, this project seeks to leverage non-local graph neural networks (GNNs) for the classification of jets in particle physics. Unlike conventional methods, which treat jets as independent entities, the proposed approach capitalizes on the inherent relational structure among particles within a jet, represented as a graph. The challenge is to account the long-range dependencies inherent to the jets, which regular GNNs fail to do. + + +## Duration + +Total project length: 175/350 hours. + +## Difficulty Level + * Intermediate/Advanced + +## Task ideas + * Develop a model for graph-based jet classification while accounting for long-range dependencies + * Benchmarking on benchmark datasets and comparison to a Transformer and regular MPNNs + +## Expected results + * Trained graph-based jet classifier + * Benchmarks on selected datasets + +## Test +Please use [this link](https://docs.google.com/document/d/1lRJocBJ0MgrTTth07xAinz_2tZRgPJF6weVkR8TGPxI/edit?usp=sharing) to access the test for this project. + +## Requirements + * Skills: Python, PyTorch or Keras and some previous experience in Machine Learning. + + +## Mentors + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Ali Hariri](mailto:ml4-sci@cern.ch) (EPFL) + * [Tom Magorsch](mailto:ml4-sci@cern.ch) (TUM) + + +## Links + * [LHC](https://home.cern/science/accelerators/large-hadron-collider) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_GENIE5.md b/_gsocproposals/2026/proposal_GENIE5.md new file mode 100644 index 00000000..f9dcaca6 --- /dev/null +++ b/_gsocproposals/2026/proposal_GENIE5.md @@ -0,0 +1,71 @@ +--- +title: Physics-Informed Neural Network Diffusion Equation (PINNDE) +layout: gsoc_proposal +project: GENIE +year: 2026 +organization: + - Alabama + - FSU + - Fermilab +--- + +## Description +There is much interest in building ultra-fast samplers that map a density that is easy to sample from, typically, an n-dimensional normal to a desired n-dimensional density. One way to compute this mapping is to solve the reverse-time diffusion equation [1], which is an integro-differential equation. In Ref. [2], the integral in this equation is approximated using Monte Carlo integration where the integrand is averaged over N (~5K – 10K) points sampled from the desired distribution. Solving this equation is relatively slow, therefore, typically a neural network is trained to model the mapping from the normal to the desired density using training data generated by repeatedly solving the differential equation. + +In this project, an alternative approach is investigated: modeling the solution to the differential equation using a physics-informed neural network (PINN) [3]. There is a large upfront cost in training the PINN, but this is subsequently amortized over the fast sampling using the PINN. Various neural network architectures for the PINN will be investigated. + + +## Duration + +Total project length: 175/350 hours. + +## Difficulty Level + * Intermediate/Advanced + +## Task ideas +* Map a 3D zero mean, unit variance, diagonal normal to a 3D non-Gaussian density using a PINN. The inputs to the PINN are t, x, y, z — that is, the reverse time t ∈ [1, 0] and a point sampled from the 3D normal. The output of the PINN is the vector solution **u**(t, x, y, z). Since the PINN is conditioned on x, y, z, during training the points can be sampled from any convenient distribution, including quasi-random sampling such as Sobol sampling. (Of course, when used we must sample from a 3D normal.) +* Repeat with increasingly complex 3D non-Gaussian densities. +* Optional: Apply what has been learned from 1 and 2 to build a fast calorimeter simulator. Use Dataset 1 from the Fast Calorimeter Simulation Challenge 2022 [4]. +* Publish the results in an ML paper. + +## Expected results +* Trained graph-based jet classifier +* Benchmarks on selected datasets + +## Test +* Using PyTorch, solve the damped harmonic oscillator [5] using a PINN. Choose fixed initial conditions: + x(0) = x₀, dx/dz(0) = v₀, with x₀ = 0.7 and v₀ = 1.2. + Condition the PINN on damping ratios in the range ξ = 0.1 to 0.4. + Solve on the domain z ∈ [0, 20]: + d²x/dz² + 2ξ·dx/dz + x = 0 + + + +## Requirements + * Experience with numerical solution of ordinary differential equations. + * Familiarity with PyTorch. + +## Difficultly Level +Advanced + +## Mentors + * [Harrison B. Prosper](mailto:ml4-sci@cern.ch) (Florida State University) + * [Pushpalatha Bhat](mailto:ml4-sci@cern.ch) (Fermilab) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + + +## Links +1. Cheng Lu†, Yuhao Zhou†, Fan Bao†, Jianfei Chen†, Chongxuan Li‡, Jun Zhu, DPM-Solver: A Fast ODE Solver for Diffusion Probabilistic Model Sampling in Around 10 Steps, arXiv:2206.00927v3, 13 Oct 2022. + +2. Yanfang Lui, Minglei Yang, Zezhong Zhang, Feng Bao, Yanzhao Cao, and Guannan Zhang, Diffusion-Model-Assisted Supervised Learning of Generative Models for Density Estimation, arXiv:2310.14458v1, 22 Oct 2023. + +3. S. Cuomo et al., Scientific Machine Learning through Physics-Informed Neural Networks: Where we are and What's next, https://doi.org/10.48550/arXiv.2201.05624. + +4. https://calochallenge.github.io/homepage/ + +5. https://en.wikipedia.org/wiki/Harmonic_oscillator + + + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_ML4DQM1.md b/_gsocproposals/2026/proposal_ML4DQM1.md new file mode 100644 index 00000000..eb326c78 --- /dev/null +++ b/_gsocproposals/2026/proposal_ML4DQM1.md @@ -0,0 +1,47 @@ +--- +title: Continuous learning for high-energy physics data quality monitoring +layout: gsoc_proposal +project: ML4DQM +year: 2026 +organization: + - Alabama + - Cerium + +--- + +## Description + + +A key challenge in data quality monitoring in high-energy physics is the need for online monitoring and control of the experiment with the data that is sensitive to underlying conditions and the constantly evolving state of the detector components. Machine learning models can be useful in identifying anomalies in the data and monitoring the quality of the data. At the same time, continuous learning techniques may be necessary to avoid machine learning model sensitivity to changing data inputs, avoiding the need to frequently re-train models. This proposal seeks to address this challenge by exploring continuous learning models capable of adapting to changing detector conditions and systems over time. + +## Duration + +Total project length: 175 hours. + +## Task ideas + * Develop continuous machine learning models. + * Evaluate and Benchmark model performance and robustness to changing detector conditions. + +## Expected results + * Build a continuous machine learning model pipeline + * Evaluate and Benchmark the models with realistic datasets + +## Requirements +C++, Python, PyTorch, Tensorflow, previous experience in Deep Learning. + +## Project difficulty level +Medium + +## Mentors + * [Emanuele Usai](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Dale Julson](mailto:ml4-sci@cern.ch) (Cerium Labs) + * [Resham Sohal](mailto:ml4-sci@cern.ch) (University of Alabama) + + +## Test +Solve the evaluation tasks at [this link](https://docs.google.com/document/d/e/2PACX-1vT4zwyd9cIC1mibfDgavyM6mhm3f2g4c2J47KANd7N5DaGuzWGFAoT4zp7TMH3QtUdq8u1M-c1bQmMg/pub). Please send us your CV and a link to all your completed work (github repo, Jupyter notebook + pdf of Jupyter notebook with output) to [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Evaluation Test: ML4DQM in the title. + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + + diff --git a/_gsocproposals/2026/proposal_NeuroDyad1.md b/_gsocproposals/2026/proposal_NeuroDyad1.md new file mode 100644 index 00000000..6861b399 --- /dev/null +++ b/_gsocproposals/2026/proposal_NeuroDyad1.md @@ -0,0 +1,56 @@ +--- +title: CEBRA-Based Data Processing Pipeline for Mapping Time-Locked EEG Paired Sets in Interacting Participants +layout: gsoc_proposal +project: NEURODYAD +year: 2026 +organization: + - Alabama +--- + +## Description + +This project aims to develop a computational pipeline using the Contrastive Embedding for Behavioral and Neural Analysis (CEBRA) method to analyze time-locked EEG data from interacting participants. The goal is to map neural dynamics within dyads, particularly in Speaker-Listener interactions, and to identify parameters that distinguish neurotypical participants from those with clinical diagnoses. The project will use CEBRA algorithm for low-dimensional, interpretable latent embeddings for: + * Mapping EEG signals from interacting dyads to uncover neural patterns underlying communication (speaking vs. listening). + * Exploring differences in feature spaces (e.g., neural synchrony, boundary conditions, or latent manifold properties) that characterize clinical versus neurotypical interactions. +This research is relevant for understanding how neural dynamics in social interaction are shaped by neurotype differences, such as those in Autism Spectrum Disorder (ASD). While CEBRA was originally designed for linking neural and behavioral data, the project aims to adapt it for clinical population comparison in a clinically relevant machine learning application. + + + +## Duration + +Total project length: 175/350 hours. + +## Task ideas + * Preprocess EEG recordings and format data to be compatible with CEBRA’s embedding architecture. + * Implement a pipeline for CEBRA-based mapping of time-locked EEG data from dyads (64-channel EEG). + * Train the model on neurotypical and clinical dyads, analyzing how latent embeddings encode neural interactions. + * Identify differentiating features (e.g., variance in manifold structure, neural synchrony, connectivity patterns) that may characterize clinical interactions. + + + +## Requirements +MATLAB, Python; solid understanding of linear algebra, topology, signal processing. Interest in clinical neuroscience and computational methods. + +## Expected results + * A functional pipeline for mapping time-locked EEG hyperscans from interacting participants using CEBRA. + * Identified neural parameters that differentiate clinical from neurotypical populations. + + +## Difficulty Level +Intermediate/Advanced + + +## Mentors + * Evie Malaia (University of Alabama) + * Brendan Ames (University of Southampton, UK) + +## Links + * [CEBRA Pipeline Docs](https://github.com/AdaptiveMotorControlLab/CEBRA) + * [Paper 1 (Schneider et al., 2023 (CEBRA Paper))](https://arxiv.org/abs/2204.00673) + * [Paper 2 ](https://arxiv.org/abs/2405.04248) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + diff --git a/_gsocproposals/2026/proposal_QMLHEP1.md b/_gsocproposals/2026/proposal_QMLHEP1.md new file mode 100644 index 00000000..569628b1 --- /dev/null +++ b/_gsocproposals/2026/proposal_QMLHEP1.md @@ -0,0 +1,69 @@ +--- +title: Implementation of Quantum Generative Adversarial Networks to Perform High Energy Physics Analysis at the LHC +layout: gsoc_proposal +project: QMLHEP +year: 2026 +organization: + - Alabama + - Wisconsin + - TUM + - BitsPilani + - BUAP +--- + +## Description +The ambitious HL-LHC program will require enormous computing resources in the next two decades. New technologies are being sought to replace the present computing infrastructure. A burning question is whether quantum computers can solve the ever-growing demand for computing resources in High-Energy Physics (HEP) in general and physics at LHC in particular. + +Discovery of new physics requires the identification of rare signals in immense backgrounds. The development of machine learning methods will greatly enhance our ability to achieve this objective. + +With this project we seek to implement Quantum Machine Learning methods for LHC HEP analysis based on the Pennylane framework. This will enhance the ability of the HEP community to use Quantum Machine Learning methods. + +## Duration + +Total project length: 175/350 hours. + +## Task ideas + * Implement a Quantum Generative Adversarial Network (QGAN) based on a suitable framework, e.g. Pennylane. See e.g. papers for possible models under ‘Links’ + * Verify that it works and no mode collapse happens using the MNIST dataset. + * Apply the quantum machine learning method to one LHC flagship physics channel (e.g. double-Higgs production). Compare the quantum machine learning performance to the classical machine learning performance. + * Train a classical GAN and a QGAN on exactly the same tasks and datasets, serving as a direct benchmark. Measure differences in training speed, and the fidelity or quality of generated samples. + * Develop and implement a QGAN that successfully captures the underlying, or “implicit,” probability distribution of the training data and compare the generated and real distributions using distribution-similarity measures. + + +## Expected results + * Trained Quantum Generative Adversarial Network method based on e.g. Pennylane framework. + * Successfully apply the Quantum Machine Learning method to LHC physics analyses and obtain performance benchmarks to compare to classical machine learning methods. + * A comprehensive comparison of the training time, mode collapse frequency, sample quality scores between QGAN and classical GAN. + * Demonstrate that the QGAN can produce samples whose statistical properties match those of the real dataset. + + + + +## Requirements + * Solid knowledge of machine learning and deep learning + * Knowledge of quantum mechanics + * Strong python skills + * Ability to work independently and proactive on a research project + +## Difficulty Level + * Intermediate/Advanced + +## Mentors + * [Rui Zhang](mailto:ml4-sci@cern.ch) (University of Wisconsin-Madison) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Emanuele Usai](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Tom Magorsh](mailto:ml4-sci@cern.ch) (TUM) + * [Abhay Kamble](mailto:ml4-sci@cern.ch) (BitsPilani) + * [Isabel Pedraza](mailto:ml4-sci@cern.ch) (Benemérita Universidad Autónoma de Puebla) + +## Links + * [HL-LHC](https://hilumilhc.web.cern.ch) + * [LHC](https://home.cern/science/accelerators/large-hadron-collider) + * [Pennylane](https://pennylane.ai) + * [Paper 1](https://arxiv.org/abs/2105.00080) + * [Paper 2](https://arxiv.org/abs/2210.16857) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_QMLHEP10.md b/_gsocproposals/2026/proposal_QMLHEP10.md new file mode 100644 index 00000000..38040d01 --- /dev/null +++ b/_gsocproposals/2026/proposal_QMLHEP10.md @@ -0,0 +1,60 @@ +--- +title: Quantum Kolmogorov-Arnold Networks for High Energy Physics Analysis at the LHC +layout: gsoc_proposal +project: QMLHEP +year: 2026 +organization: + - Alabama + - Kansas +--- + +## Description +The ambitious HL-LHC program will require enormous computing resources in the next two decades. New technologies are being sought after to replace the present computing infrastructure. A burning question is whether quantum computer can solve the ever growing demand of computing resources in High Energy Physics (HEP) in general and physics at LHC in particular. + +Discovery of new physics requires the identification of rare signals against immense backgrounds. Development of machine learning methods will greatly enhance our ability to achieve this objective. With this project we seek to implement Quantum Machine Learning methods for LHC HEP analysis based on the Pennylane framework. This will enhance the ability of the HEP community to use Quantum Machine Learning methods. + + +## Duration + +Total project length: 175 hours. + +## Task ideas + * Implement a quantum Kolmogorov-Arnold Network architecture. + * Benchmark the trained model on selected tasks + + +## Expected results + * Trained quantum diffusion model + * Benchmark of the performance on a HEP dataset compared against a classical reference model + + + + + +## Requirements + * Solid knowledge of machine learning and deep learning + * Knowledge of quantum mechanics and linear algebra + * Strong python skills + * Ability to work independently and proactive on a research project + +## Difficulty Level + * Intermediate/Advanced + +## Mentors + * [Eric Reinhardt](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Dinesh Ramakrishnan](mailto:ml4-sci@cern.ch) (University of Alabama) + * [KC Kong](mailto:ml4-sci@cern.ch) (University of Kansas) + + +## Links + * [HL-LHC](https://hilumilhc.web.cern.ch) + * [LHC](https://home.cern/science/accelerators/large-hadron-collider) + * [Pennylane](https://pennylane.ai) + * [Paper 1](https://arxiv.org/pdf/2404.19756) + * [Paper 2](https://www.frontiersin.org/journals/artificial-intelligence/articles/10.3389/frai.2024.1462952/full) + * [Paper 3](https://arxiv.org/pdf/2410.04435) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_QMLHEP11.md b/_gsocproposals/2026/proposal_QMLHEP11.md new file mode 100644 index 00000000..146a1af1 --- /dev/null +++ b/_gsocproposals/2026/proposal_QMLHEP11.md @@ -0,0 +1,58 @@ +--- +title: Quantum Foundation Model for High Energy Physics +layout: gsoc_proposal +project: QMLHEP +year: 2026 +organization: + - Alabama + - Wisconsin + - TUM +--- + +## Description +The ambitious HL-LHC program will require enormous computing resources and datasets in the next two decades. New technologies are being sought after to replace the present computing infrastructure. A burning question is whether quantum computers can solve the ever growing demand of computing resources in High Energy Physics (HEP) in general and physics at LHC in particular. Our goal here is to explore and to demonstrate that Quantum Computing can be the new paradigm (Proof of Principle). +Discovery of new physics requires the identification of rare signals against immense backgrounds. Development of machine learning methods will greatly enhance our ability to achieve this objective. However, with this ever-growing volume of data in the near future, current machine learning algorithms will require large computing resources and excessive computing time to achieve good performance. Quantum Computing in Qubit platform, where qubits are used instead of bits in classical computers, has the potential to improve the time complexity of classical algorithms. + +With this project we seek to implement Quantum Foundation Model for LHC HEP analysis based on the Pennylane framework. This will enhance the ability of the HEP community to use Quantum Machine Learning methods. + +## Duration + +Total project length: 175 hours. + +## Task ideas + * Design and implement a quantum foundation model architecture for LHC data + * Benchmark the trained model on selected tasks by employing a classical model + * Analyze scalability and computational complexity of the proposed model + + +## Expected results + * Trained quantum foundation model + * Benchmark of the performance on a HEP dataset compared against a classical reference model + + + +## Requirements + * Strong background in Machine Learning & Deep Learning. + * Knowledge of Quantum Computing (VQAs, Quantum Optimization). + * Proficiency in Python & Pennylane. + * Ability to work independently on research projects. + +## Difficulty Level + * Advanced – requires expertise in Quantum ML and HEP data analysis. + +## Mentors + * [Rui Zhang](mailto:ml4-sci@cern.ch) (University of Wisconsin-Madison) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Konstantin Matchev](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Tom Magorsh](mailto:ml4-sci@cern.ch) (TUM) + + +## Links + * [HL-LHC](https://hilumilhc.web.cern.ch) + * [LHC](https://home.cern/science/accelerators/large-hadron-collider) + * [Pennylane](https://pennylane.ai) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_QMLHEP12.md b/_gsocproposals/2026/proposal_QMLHEP12.md new file mode 100644 index 00000000..ce0520be --- /dev/null +++ b/_gsocproposals/2026/proposal_QMLHEP12.md @@ -0,0 +1,53 @@ +--- +title: Quantum Machine Learning for Exoplanet Characterization +layout: gsoc_proposal +project: QMLHEP +year: 2026 +organization: + - University of Alabama + - Georgia +--- + +#### Note +This project is a collaboration with EXXA and QMLHEP. + +#### Description + +The characterization of exoplanet atmospheres is crucial for understanding their compositions, weather patterns, and potential habitability. This project aims to develop quantum machine learning models to analyze spectral data from exoplanets, identifying chemical abundances, cloud/haze structure and different atmospheric processes. The project will leverage data from telescopes and space missions, along with simulations of exoplanetary atmospheres under various conditions, to train and validate the models. + +#### Duration + +Total project length: 175/350 hours. + +#### Task Ideas + * Identify suitable latent representations of the exoplanet transmission data. + * Develop a quantum machine learning architecture for detecting anomalous exoplanets based on synthetic transmission spectra. + * Develop a quantum generative model for simulating exoplanet transmission spectra. + * Apply the trained models to real observational data from missions like Hubble, JWST, and future telescopes to characterize exoplanet atmospheres. + * Benchmark the performance of the developed quantum machine learning models against their classical counterparts. + + +#### Expected Results + * A set of quantum machine learning models capable of accurately modeling exoplanet atmospheres or flagging anomalous spectra. + * Analysis of the models’ performance on observational data, demonstrating their applicability to current and future exoplanet studies. + + +## Requirements + * Python + * PyTorch or TensorFlow (or similar) + * Some experience with Qiskit or Pennylane is preferred + * Background in astronomy is a bonus but not a requirement + + +## Test +Use [this link](https://docs.google.com/document/d/1t2cSxEx3vIa6uirfkMkF92rWZM4tTJ_V-lkpYSdukVQ/edit?usp=sharing) for instructions on completing the test. + +## Mentors + +* [Katia Matcheva](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Konstantin Matchev](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Alex Roman](mailto:ml4-sci@cern.ch) (University of Alabama) +* [Emilie Panek](mailto:ml4-sci@cern.ch) (University of Alabama) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_QMLHEP2.md b/_gsocproposals/2026/proposal_QMLHEP2.md new file mode 100644 index 00000000..a3721468 --- /dev/null +++ b/_gsocproposals/2026/proposal_QMLHEP2.md @@ -0,0 +1,59 @@ +--- +title: Quantum Graph Neural Networks for High Energy Physics Analysis at the LHC +layout: gsoc_proposal +project: QMLHEP +year: 2026 +organization: + - Alabama + - Wisconsin + - Kansas + - Florida +--- + +## Description +The ambitious HL-LHC program will require enormous computing resources in the next two decades. New technologies are being sought to replace the present computing infrastructure. A burning question is whether quantum computers can solve the ever-growing demand for computing resources in High-Energy Physics (HEP) in general and physics at LHC in particular. Discovery of new physics requires the identification of rare signals against immense backgrounds. The development of machine learning methods will greatly enhance our ability to achieve this objective. With this project we seek to implement Quantum Machine Learning methods for LHC HEP analysis based on the Pennylane framework. This will enhance the ability of the HEP community to use Quantum Machine Learning methods. + +## Duration + +Total project length: 175/350 hours. + +## Task ideas + * Implement a Quantum Graph Neural Network (QGNN) method based on a suitable framework e.g. Pennylane. + * Apply the quantum machine learning method to a benchmark high-energy physics analysis and benchmark the quantum machine learning performance compared to classical machine learning methods + +## Expected results + * Trained Quantum Graph Neural Network with e.g. Pennylane framework. + * Apply the Quantum Machine Learning method to LHC physics analysis and compare to classical machine learning methods. + + + +## Requirements + * Solid knowledge of machine learning and deep learning + * Knowledge of quantum mechanics desired + * Strong python skills + * Ability to work independently and proactive on a research project + +## Difficulty Level + * Intermediate/Advanced + +## Mentors + * [Rui Zhang](mailto:ml4-sci@cern.ch) (University of Wisconsin-Madison) + * [Alkaid Cheng](mailto:ml4-sci@cern.ch) (University of Wisconsin-Madison) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [KC Kong](mailto:ml4-sci@cern.ch) (University of Kansas) + * [Roy Forestano](mailto:ml4-sci@cern.ch) (University of Florida) + +## Links + * [HL-LHC](https://hilumilhc.web.cern.ch) + * [LHC](https://home.cern/science/accelerators/large-hadron-collider) + * [Pennylane](https://pennylane.ai) + * [Paper 1](https://arxiv.org/abs/2201.05158) + * [Paper 2](https://arxiv.org/abs/2103.10837) + * [Paper 3](https://arxiv.org/abs/2112.06088) + * [Paper 4](https://arxiv.org/abs/1909.12264) + * [Paper 5](https://arxiv.org/abs/2311.18672) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_QMLHEP4.md b/_gsocproposals/2026/proposal_QMLHEP4.md new file mode 100644 index 00000000..d03a6b2c --- /dev/null +++ b/_gsocproposals/2026/proposal_QMLHEP4.md @@ -0,0 +1,59 @@ +--- +title: Equivariant quantum neural networks for High Energy Physics Analysis at the LHC +layout: gsoc_proposal +project: QMLHEP +year: 2026 +organization: + - Alabama + - Wisconsin + - Kansas +--- + +## Description +The ambitious HL-LHC program will require enormous computing resources in the next two decades. New technologies are being sought after to replace the present computing infrastructure. A burning question is whether a quantum computer can solve the ever growing demand of computing resources in High Energy Physics (HEP) in general and physics at LHC in particular. Discovery of new physics requires the identification of rare signals against immense backgrounds. Developing machine learning methods will greatly enhance our ability to achieve this objective. With this project, we seek to implement Quantum Machine Learning (QML) methods for LHC HEP analysis based on some QML frameworks (PennyLane, Cirq, Bloqade, …). This will enhance the ability of the HEP community to use QML methods. + +## Duration + +Total project length: 175/350 hours. + +## Task ideas + * Implement an equivariant quantum neural network. + * Test the equivariant quantum model with HEP datasets + * Benchmark the trained model and compare it against classical and non-equivariant models. + +## Expected results + * Trained equivariant quantum neural networks with a QML framework (PennyLane, Cirq, Bloqade, etc.). + * Benchmark of the performance against a non-equivariant model + + + +## Requirements + * Solid knowledge of machine learning and deep learning + * Knowledge of quantum mechanics + * Strong python skills + * Ability to work independently and proactive on a research project + * Basic mathematics of group theory + +## Difficulty Level + * Intermediate/Advanced + +## Mentors + * [Rui Zhang](mailto:ml4-sci@cern.ch) (University of Wisconsin-Madison) + * [Alkaid Cheng](mailto:ml4-sci@cern.ch) (University of Wisconsin Madison) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [KC Kong](mailto:ml4-sci@cern.ch) (University of Kansas) + * [Alex Roman](mailto:ml4-sci@cern.ch) (University of Alabama) + + +## Links + * [HL-LHC](https://hilumilhc.web.cern.ch) + * [LHC](https://home.cern/science/accelerators/large-hadron-collider) + * [Pennylane](https://pennylane.ai) + * [Paper 1](https://arxiv.org/abs/2210.08566) + * [Paper 2](https://arxiv.org/abs/2311.18672) + * [Paper 3](https://arxiv.org/abs/2212.00264) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_QMLHEP5.md b/_gsocproposals/2026/proposal_QMLHEP5.md new file mode 100644 index 00000000..0009cf89 --- /dev/null +++ b/_gsocproposals/2026/proposal_QMLHEP5.md @@ -0,0 +1,59 @@ +--- +title: Quantum transformer for High Energy Physics Analysis at the LHC +layout: gsoc_proposal +project: QMLHEP +year: 2026 +organization: + - Alabama + - Wisconsin + - PUC +--- + +## Description +The ambitious HL-LHC program will require enormous computing resources in the next two decades. New technologies are being sought after to replace the present computing infrastructure. A burning question is whether quantum computer can solve the ever growing demand of computing resources in High Energy Physics (HEP) in general and physics at LHC in particular. + +Discovery of new physics requires the identification of rare signals against immense backgrounds. Development of machine learning methods will greatly enhance our ability to achieve this objective. With this project we seek to implement Quantum Machine Learning methods for LHC HEP analysis based on the Pennylane framework. This will enhance the ability of the HEP community to use Quantum Machine Learning methods. + + +## Duration + +Total project length: 175 hours. + +## Task ideas + * Implement a quantum transformer architecture (QVIT, QTF). + * Benchmark the trained model on selected tasks by e.g. employing a hybrid transformer (vision and sequence) + +## Expected results + * Trained quantum transformer model. + * Benchmark of the performance on a HEP dataset compared against a classical reference model + + + +## Requirements + * Solid knowledge of machine learning and deep learning + * Knowledge of quantum mechanics desired + * Strong python skills + * Ability to work independently and proactive on a research project + +## Difficulty Level + * Intermediate/Advanced + +## Mentors + * [Rui Zhang](mailto:ml4-sci@cern.ch) (University of Wisconsin-Madison) + * [Alkaid Cheng](mailto:ml4-sci@cern.ch) (University of Wisconsin Madison) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Konstantin Matchev](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Emanuele Usai](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Marçal Comajoan Cara](mailto:ml4-sci@cern.ch) (Polytecnic University of Catalonia) + +## Links + * [HL-LHC](https://hilumilhc.web.cern.ch) + * [LHC](https://home.cern/science/accelerators/large-hadron-collider) + * [Pennylane](https://pennylane.ai/) + * [Paper 1](https://arxiv.org/abs/2110.06510) + * [Paper 2](https://arxiv.org/abs/2209.08167) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_QMLHEP6.md b/_gsocproposals/2026/proposal_QMLHEP6.md new file mode 100644 index 00000000..03920e5a --- /dev/null +++ b/_gsocproposals/2026/proposal_QMLHEP6.md @@ -0,0 +1,60 @@ +--- +title: Learning quantum representations of classical high energy physics data with contrastive learning +layout: gsoc_proposal +project: QMLHEP +year: 2026 +organization: + - Alabama + - Wisconsin + - TUM +--- + +## Description +The ambitious [HL-LHC](https://hilumilhc.web.cern.ch) program will require enormous computing resources in the next two decades. New technologies are being sought after to replace the present computing infrastructure. A burning question is whether quantum computer can solve the ever growing demand of computing resources in High Energy Physics (HEP) in general and physics at [LHC](https://home.cern/science/accelerators/large-hadron-collider) in particular. Our goal here is to explore and to demonstrate that Quantum Computing can be the new paradigm (Proof of Principle). + +Discovery of new physics requires the identification of rare signals against immense backgrounds. Development of machine learning methods will greatly enhance our ability to achieve this objective. However, with this ever-growing volume of data in the near future, current machine learning algorithms will require large computing resources and excessive computing time to achieve good performance. Quantum Computing in Qubit platform, where qubits are used instead of bits in classical computer, has the potential to improve the time complexity of classical algorithms. + +With this project we seek to implement Quantum Machine Learning methods for LHC HEP analysis based on e.g. the Pennylane framework. This will enhance the ability of the HEP community to use Quantum Machine Learning methods. + +## Duration + +Total project length: 175/350 hours. + +## Task ideas + * Implement a trainable embedding function to encode classical data onto a quantum model with contrastive learning. Try and develop different ideas for embedding functions and contrastive losses for training. + * Benchmark the trained embedding against a standard encoding on a given QML model (e.g. a QCNN). + +## Expected results + * Trained embedding function for classical data with e.g. Pennylane framework. + * Benchmark of the performance against a standard encoding + + + + +## Requirements + * Solid knowledge of machine learning and deep learning + * Knowledge of quantum mechanics desired + * Strong python skills + * Ability to work independently and proactive on a research project + +## Difficulty Level + * Intermediate/Advanced + +## Mentors + * [Rui Zhang](mailto:ml4-sci@cern.ch) (University of Wisconsin-Madison) + * [Alkaid Cheng](mailto:ml4-sci@cern.ch) (University of Wisconsin Madison) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Emanuele Usai](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Tom Magorsh](mailto:ml4-sci@cern.ch) (TUM) + +## Links + * [HL-LHC](https://hilumilhc.web.cern.ch) + * [LHC](https://home.cern/science/accelerators/large-hadron-collider) + * [Pennylane](https://pennylane.ai) + * [Paper 1](https://iopscience.iop.org/article/10.1088/2058-9565/ac6825) + * [Paper 2](https://arxiv.org/abs/2008.08605) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_QMLHEP7.md b/_gsocproposals/2026/proposal_QMLHEP7.md new file mode 100644 index 00000000..5f65a337 --- /dev/null +++ b/_gsocproposals/2026/proposal_QMLHEP7.md @@ -0,0 +1,69 @@ +--- +title: Quantum Particle transformer for High Energy Physics Analysis at the LHC +layout: gsoc_proposal +project: QMLHEP +year: 2026 +organization: + - Alabama + - Wisconsin + - PUC +--- + +## Description +The ambitious HL-LHC program will require enormous computing resources in the next two decades. New technologies are being sought after to replace the present computing infrastructure. A burning question is whether quantum computer can solve the ever growing demand of computing resources in High Energy Physics (HEP) in general and physics at LHC in particular. Our goal here is to explore and to demonstrate that Quantum Computing can be the new paradigm (Proof of Principle). + +Discovery of new physics requires the identification of rare signals against immense backgrounds. The development of machine learning methods will greatly enhance our ability to achieve this objective. However, with this ever-growing volume of data in the near future, current machine learning algorithms will require large computing resources and excessive computing time to achieve good performance. Quantum Computing in the Qubit platform, where qubits are used instead of bits in classical computers, can potentially improve the time complexity of classical algorithms. + +With this project, we seek to implement Quantum Machine Learning methods for LHC HEP analysis based on the Pennylane framework. This will enhance the HEP community’s ability to use Quantum Machine Learning methods. + + +## Duration + +Total project length: 175 hours. + +## Task ideas + * Develop a baseline Particle Transformer (ParT). + * Incorporate quantum-inspired architectures into the ParT model, leveraging VQCs and QONNs to explore potential advantages in efficiency and generalization. + * Train and evaluate the Q-ParT models on HEP datasets, such as jet tagging and particle classification datasets. + + +## Expected results + * Trained quantum particle transformer models. + * Benchmark of the performance on a HEP dataset compared against a classical reference model + + + + + +## Requirements + * Solid knowledge of machine learning and deep learning + * Knowledge of quantum mechanics + * Strong python skills + * Ability to work independently and proactive on a research project + +## Difficulty Level + * Intermediate/Advanced + +## Mentors + * [Rui Zhang](mailto:ml4-sci@cern.ch) (University of Wisconsin-Madison) + * [Alkaid Cheng](mailto:ml4-sci@cern.ch) (University of Wisconsin Madison) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Konstantin Matchev](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Emanuele Usai](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Marçal Comajoan Cara](mailto:ml4-sci@cern.ch) (Polytecnic University of Catalonia) + + +## Links + * [HL-LHC](https://hilumilhc.web.cern.ch) + * [LHC](https://home.cern/science/accelerators/large-hadron-collider) + * [Pennylane](https://pennylane.ai) + * [Paper 1](https://iopscience.iop.org/article/10.1088/2058-9565/ac6825) + * [Paper 2](https://arxiv.org/abs/2008.08605) + * [Paper 3](https://arxiv.org/abs/2202.03772v3) + * [Paper 4](https://arxiv.org/abs/2405.10284) + * [Paper 5](https://arxiv.org/abs/2209.08167) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_QMLHEP8.md b/_gsocproposals/2026/proposal_QMLHEP8.md new file mode 100644 index 00000000..28293f42 --- /dev/null +++ b/_gsocproposals/2026/proposal_QMLHEP8.md @@ -0,0 +1,67 @@ +--- +title: Q-MAML - Quantum Model-Agnostic Meta-Learning for Variational Quantum Algorithms for High Energy Physics Analysis at the LHC +layout: gsoc_proposal +project: QMLHEP +year: 2026 +organization: + - Alabama + - Wisconsin +--- + +## Description +The rapid data growth at the Large Hadron Collider (LHC) presents an unprecedented challenge for computational resources. As the High Luminosity LHC (HL-LHC) era approaches, existing classical computing infrastructures will struggle to keep up with the increasing complexity of data analysis. Machine learning techniques have already demonstrated their potential in identifying rare physics signals within massive datasets, but the computational cost of model training and optimization limits their efficiency. + +Quantum Computing offers a new paradigm for tackling these challenges by leveraging Variational Quantum Algorithms (VQAs). However, training these quantum models effectively remains challenging due to barren plateaus and inefficient parameter optimization, leading to slow convergence. + +This project explores the potential of AI for Quantum Computing to improve the efficiency of quantum machine learning in High Energy Physics (HEP). By optimizing variational quantum circuits with a classical meta-learning model, we aim to accelerate convergence and reduce the computational burden of quantum optimization. This approach will be tested on real or simulated LHC data, demonstrating the feasibility of quantum-enhanced data analysis for HEP. + + +## Duration + +Total project length: 175 hours. + +## Task ideas + * D(example. Q-MAML) Implementation for HEP Tasks + * Design and implement (Q-MAML) for optimizing variational quantum circuits. + * Apply (Q-MAML) to common HEP-related quantum optimization problems. + * Benchmarking on HEP Datasets + * Compare (Q-MAML-enhanced) quantum models against classical ML models. + * Analyze performance improvements in terms of convergence speed and accuracy. + + + +## Expected results + * Trained variational quantum models optimized for HEP analysis. + * Benchmarks comparing Q-MAML-enhanced quantum optimization to classical methods. + * Demonstration of improved trainability and efficiency of quantum models for LHC data. + + + + + +## Requirements + * Strong background in Machine Learning & Deep Learning. + * Knowledge of Quantum Computing (VQAs, Quantum Optimization). + * Proficiency in Python & Pennylane. + * Ability to work independently on research projects. + +## Difficulty Level + * Advanced – requires expertise in Quantum ML and HEP data analysis. + +## Mentors + * [Rui Zhang](mailto:ml4-sci@cern.ch) (University of Wisconsin-Madison) + * [Alkaid Cheng](mailto:ml4-sci@cern.ch) (University of Wisconsin Madison) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Konstantin Matchev](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Emanuele Usai](mailto:ml4-sci@cern.ch) (University of Alabama) + + +## Links + * [HL-LHC](https://hilumilhc.web.cern.ch) + * [LHC](https://home.cern/science/accelerators/large-hadron-collider) + * [Pennylane](https://pennylane.ai) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_QMLHEP9.md b/_gsocproposals/2026/proposal_QMLHEP9.md new file mode 100644 index 00000000..2af62247 --- /dev/null +++ b/_gsocproposals/2026/proposal_QMLHEP9.md @@ -0,0 +1,68 @@ +--- +title: Quantum Diffusion Model for High Energy Physics +layout: gsoc_proposal +project: QMLHEP +year: 2026 +organization: + - Alabama + - Wisconsin + - TUM +--- + +## Description +The ambitious HL-LHC program will require enormous computing resources and datasets in the next two decades. New technologies are being sought after to replace the present computing infrastructure. A burning question is whether quantum computers can solve the ever growing demand of computing resources in High Energy Physics (HEP) in general and physics at LHC in particular. Our goal here is to explore and to demonstrate that Quantum Computing can be the new paradigm (Proof of Principle). +Discovery of new physics requires the identification of rare signals against immense backgrounds. Development of machine learning methods will greatly enhance our ability to achieve this objective. However, with this ever-growing volume of data in the near future, current machine learning algorithms will require large computing resources and excessive computing time to achieve good performance. Quantum Computing in Qubit platform, where qubits are used instead of bits in classical computers, has the potential to improve the time complexity of classical algorithms. + +With this project we seek to implement Quantum Machine Learning methods for LHC HEP analysis based on the Pennylane framework. This will enhance the ability of the HEP community to use Quantum Machine Learning methods. + + + +## Duration + +Total project length: 175 hours. + +## Task ideas + * Implement a fully quantum diffusion model architecture + * Benchmark the trained model on selected tasks by employing a classical model (DDPM, DDIM, or a similar variant) + * Analyze scalability and computational complexity of the proposed model + + + + +## Expected results + * Trained quantum diffusion model + * Benchmark of the performance on a HEP dataset compared against a classical reference model + + + + + +## Requirements + * Strong background in Machine Learning & Deep Learning. + * Knowledge of Quantum Computing (VQAs, Quantum Optimization). + * Proficiency in Python & Pennylane. + * Ability to work independently on research projects. + +## Difficulty Level + * Advanced – requires expertise in Quantum ML and HEP data analysis. + +## Mentors + * [Rui Zhang](mailto:ml4-sci@cern.ch) (University of Wisconsin-Madison) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Konstantin Matchev](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Tom Magorsh](mailto:ml4-sci@cern.ch) (TUM) + + +## Links + * [HL-LHC](https://hilumilhc.web.cern.ch) + * [LHC](https://home.cern/science/accelerators/large-hadron-collider) + * [Pennylane](https://pennylane.ai) + * [Paper 1](https://arxiv.org/pdf/2401.07049) + * [Paper 2](https://arxiv.org/pdf/2401.07039) + * [Blog Post](https://medium.com/@mashapotatoes/gsoc-quantum-diffusion-model-for-high-energy-physics-part-ii-6e693d625931) + * [Paper 3](https://openreview.net/pdf?id=vUQLzDAdqt) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_SYMBA1.md b/_gsocproposals/2026/proposal_SYMBA1.md new file mode 100644 index 00000000..df434640 --- /dev/null +++ b/_gsocproposals/2026/proposal_SYMBA1.md @@ -0,0 +1,49 @@ +--- +title: Next-Generation Transformer Models for Symbolic Calculations of Squared Amplitudes in HEP +layout: gsoc_proposal +project: SYMBA +year: 2026 +organization: + - Alabama + - FSU + - QU + +--- + +## Description + +One of the most important physical quantities in particle physics is the cross section, or a probability that a particular process takes place in the interaction of elementary particles. Its measure provides a testable link between theory and experiment. It is obtained theoretically mainly by calculating the squared amplitude. The approach we use in this project is to treat the amplitude and squared amplitude as mathematical symbolic expressions and use language-translation models to map from the amplitude to squared-amplitude. This project will explore uses of more advanced techniques which could include but are not limited to Kolmogorov-Arnold Network layers in transformers, genetic algorithms and other evolutionary techniques, reinforcement learning. + +## Duration + +Total project length: 175/350 hours. + +## Task ideas and expected results + * Develop various transformer-based models on sequence-to-sequence tasks + * Benchmark different models on simulated physics datasets of various complexity and sequence lengths to find the best model + * Integrate with the SymbaHEP pipeline + +## Requirements +Significant experience with Transformer machine learning models in Python (preferably using pytorch). + +## Difficulty Level +Advanced + + + +## Mentors + * [Eric Reinhardt](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Abdulhakim Alnuqaydan](mailto:ml4-sci@cern.ch) (Qassim University) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Harrison Prosper](mailto:ml4-sci@cern.ch) (Florida State University) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + +## Links + * [Paper 1](https://ml4physicalsciences.github.io/2023/files/NeurIPS_ML4PS_2023_183.pdf) + * [Paper 2](https://iopscience.iop.org/article/10.1088/2632-2153/acb2b2) + * [Poster 1](https://nips.cc/media/PosterPDFs/NeurIPS%202023/76219.png) + * [Blog post 1](https://medium.com/@neerajanandfirst/my-journey-to-google-summer-of-code-2023-with-ml4sci-8822ce64464a) diff --git a/_gsocproposals/2026/proposal_SYMBA2.md b/_gsocproposals/2026/proposal_SYMBA2.md new file mode 100644 index 00000000..e3b4164b --- /dev/null +++ b/_gsocproposals/2026/proposal_SYMBA2.md @@ -0,0 +1,50 @@ +--- +title: State-space models for squared amplitude calculation in high-energy physics +layout: gsoc_proposal +project: SYMBA +year: 2026 +organization: + - Alabama + - QU +--- + +## Description + +One of the most important physical quantities in particle physics is the cross section, or a probability that a particular process takes place in the interaction of elementary particles. Its measure provides a testable link between theory and experiment. It is obtained theoretically mainly by calculating the squared amplitude. In this project we will explore state-space models (SSMs) to map from amplitudes to squared amplitudes using sequence to sequence representations. + +## Duration + +Total project length: 175/350 hours. + +## Task ideas and expected results + * Develop and deploy state-space models (SSM) for symbolic solutions of squared amplitude calculations + * Benchmark the SSM models with other transformer-based solutions + * Integrated with the SymbaHEP pipeline + +## Requirements +Significant experience with machine learning models in Python (preferably using pytorch). + +## Difficulty Level +Advanced + + + + +## Mentors + * [Eric Reinhardt](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Abdulhakim Alnuqaydan](mailto:ml4-sci@cern.ch) (Qassim University) + * [Marco Knipfer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Victor Baules](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Dinesh Ramakrishnan](mailto:ml4-sci@cern.ch) (University of Alabama) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + +## Links + * [Paper 1](https://arxiv.org/abs/2312.00752) + * [Paper 2](https://ml4physicalsciences.github.io/2023/files/NeurIPS_ML4PS_2023_183.pdf) + * [Paper 3](https://iopscience.iop.org/article/10.1088/2632-2153/acb2b2) + * [Poster 1](https://nips.cc/media/PosterPDFs/NeurIPS%202023/76219.png) + * [Blog post 1](https://medium.com/@neerajanandfirst/my-journey-to-google-summer-of-code-2023-with-ml4sci-8822ce64464a) diff --git a/_gsocproposals/2026/proposal_SYMBA3.md b/_gsocproposals/2026/proposal_SYMBA3.md new file mode 100644 index 00000000..7ce1a682 --- /dev/null +++ b/_gsocproposals/2026/proposal_SYMBA3.md @@ -0,0 +1,50 @@ +--- +title: Transformer Models for Symbolic Regression +layout: gsoc_proposal +project: SYMBA +year: 2026 +organization: + - Alabama + - FSU + - QU +--- + +## Description + +Symbolic regression can be used to rapidly provide solutions to problems in science which may have large computational complexity or may even be intractable. It can be used to discover a symbolic expression describing data such as a physical law. Previous work has explored combinations of Transformer models combined with genetic algorithms or reinforcement learning. Future work on this project might extend those approaches but could also include explorations of alternative approaches such as incorporation of Kolmogorov-Arnold Layers or novel LLM-based approaches. As a concrete testbed for these new algorithms, the project will focus on predicting physical quantities, such as cross sections in high-energy physics, e.g a probability that a particular process takes place in the interaction of elementary particles. Its measure provides a testable link between theory and experiment. It is obtained theoretically mainly by calculating the squared amplitude. + +## Duration + +Total project length: 175/350 hours. + +## Task ideas and expected results + * Develop symbolic regression models based on next-gen transformer architectures + * Benchmark these models on synthetic and high-energy physics datasets + +## Requirements +Significant experience with Transformer machine learning models in Python (preferably using pytorch). + +## Difficulty Level +Intermediate + + + + +## Mentors + * [Eric Reinhardt](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Harrison Prosper](mailto:ml4-sci@cern.ch) (Florida State University) + * [Marco Knipfer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Dinesh Ramakrishnan](mailto:ml4-sci@cern.ch) (University of Alabama) + * [François Charton](mailto:ml4-sci@cern.ch) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + +## Links + * [Paper 1](https://ml4physicalsciences.github.io/2023/files/NeurIPS_ML4PS_2023_183.pdf) + * [Paper 2](https://iopscience.iop.org/article/10.1088/2632-2153/acb2b2) + * [Paper 3](https://ml4physicalsciences.github.io/2024/files/NeurIPS_ML4PS_2024_115.pdf) + * [Paper 4](https://ml4physicalsciences.github.io/2024/files/NeurIPS_ML4PS_2024_118.pdf) + * [Blog Post 1](https://medium.com/@aryamaanthakur/transformers-meet-evolution-a-hybrid-approach-to-symbolic-regression-final-progress-gsoc-0de041ac013d) diff --git a/_gsocproposals/2026/proposal_SYMBA4.md b/_gsocproposals/2026/proposal_SYMBA4.md new file mode 100644 index 00000000..eb39a394 --- /dev/null +++ b/_gsocproposals/2026/proposal_SYMBA4.md @@ -0,0 +1,48 @@ +--- +title: Titans for squared amplitude calculation +layout: gsoc_proposal +project: SYMBA +year: 2026 +organization: + - Alabama +--- + +## Description + +One of the most important physical quantities in particle physics is the cross section, or a probability that a particular process takes place in the interaction of elementary particles. Its measure provides a testable link between theory and experiment. It is obtained theoretically mainly by calculating the squared amplitude. In this project we will explore google’s novel TITANS architecture to map from amplitudes to squared amplitudes using sequence to sequence representations. + +## Duration + +Total project length: 175/350 hours. + +## Task ideas and expected results + * Develop Titans architecture for use at scale for squared amplitude calculation + * Analyze performance of titans architure relative to baselines + * Integrate Titans architecture into existing SYMBAHEP code pipelines + + +## Requirements +Significant experience with developing foundational models in Python (preferably using pytorch). + +## Difficulty Level +Advanced + + + + +## Mentors + * [Eric Reinhardt](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Marco Knipfer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Victor Baules](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Dinesh Ramakrishnan](mailto:ml4-sci@cern.ch) (University of Alabama) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + +## Links + * [Paper 1](https://iopscience.iop.org/article/10.1088/2632-2153/acb2b2) + * [Paper 2](https://arxiv.org/html/2501.00663v1) + * [Paper 3](https://ml4physicalsciences.github.io/2024/files/NeurIPS_ML4PS_2024_118.pdf) diff --git a/_gsocproposals/2026/proposal_SYMBA5.md b/_gsocproposals/2026/proposal_SYMBA5.md new file mode 100644 index 00000000..78b03a9c --- /dev/null +++ b/_gsocproposals/2026/proposal_SYMBA5.md @@ -0,0 +1,49 @@ +--- +title: Evolutionary and Transformer Models for Symbolic Regression +layout: gsoc_proposal +project: SYMBA +year: 2026 +organization: + - Alabama + - QU +--- + +## Description + +Symbolic regression can be used to rapidly provide solutions to problems in science which may have large computational complexity or may even be intractable. It can be used to discover a symbolic expression describing data such as a physical law. Current directions in symbolic regression focus either on evolutionary/genetic programming approaches or alternatively transformer based solutions. This project will explore a combination of these ideas towards a new tool for symbolic regression that can be used to solve many problems in science. +As a concrete testbed for these new algorithms, the project will focus on predicting physical quantities, such as cross sections in high-energy physics, e.g a probability that a particular process takes place in the interaction of elementary particles. Its measure provides a testable link between theory and experiment. It is obtained theoretically mainly by calculating the squared amplitude. + +## Duration + +Total project length: 175/350 hours. + +## Task ideas and expected results + * Extend previous work on genetic algorithms and transformers from previous year's work + * Benchmark these models on synthetic and high-energy physics datasets + +## Requirements +Significant experience with Transformer machine learning models in Python (preferably using pytorch). + +## Difficulty Level +Advanced + + + + +## Mentors + * [Eric Reinhardt](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Abdulhakim Alnuqaydan](mailto:ml4-sci@cern.ch) (Qassim University) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Marco Knipfer](mailto:ml4-sci@cern.ch) (University of Alabama) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + +## Links + * [Paper 1](https://arxiv.org/abs/2204.10532) + * [Paper 2](https://arxiv.org/abs/1901.11117) + * [Paper 3](https://iopscience.iop.org/article/10.1088/2632-2153/acb2b2) + * [Paper 4](https://ml4physicalsciences.github.io/2024/files/NeurIPS_ML4PS_2024_115.pdf) + * [Paper 5](https://ml4physicalsciences.github.io/2024/files/NeurIPS_ML4PS_2024_118.pdf) diff --git a/_gsocproposals/2026/proposal_SYMBA6.md b/_gsocproposals/2026/proposal_SYMBA6.md new file mode 100644 index 00000000..655a57b9 --- /dev/null +++ b/_gsocproposals/2026/proposal_SYMBA6.md @@ -0,0 +1,56 @@ +--- +title: Symbolic empirical representation of squared amplitudes in high-energy physics +layout: gsoc_proposal +project: SYMBA +year: 2026 +organization: + - Alabama + - FSU + - QU +--- + + +## Description + + +One of the most important physical quantities in particle physics is the cross section, or a probability that a particular process takes place in the interaction of elementary particles. Its measure provides a testable link between theory and experiment. It is obtained theoretically mainly by calculating the squared amplitude (matrix M). + + +## Duration + + +Total project length: 175/350 hours. + + +## Task ideas and expected results + * Explore physics-informed ideas for improving data representations, physics-aware models, and physics simulations for squared amplitude calculation + * Apply symbolic machine learning techniques to predict the squared amplitudes and cross section for high-energy physics + +## Requirements + * Python, C++ and some experience in Machine Learning sequence models. + * Knowledge of physics and linear algebra is desired + + + + + + +## Difficulty Level +Intermediate/Advanced + + +## Mentors + * [Abdulhakim Alnuqaydan](mailto:ml4-sci@cern.ch) (Qassim University) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Harrison Prosper](mailto:ml4-sci@cern.ch) (Florida State University) + * [Eric Reinhardt](mailto:ml4-sci@cern.ch) (University of Alabama) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + +## Links + * [Paper 1](https://iopscience.iop.org/article/10.1088/2632-2153/acb2b2) + * [Paper 2](https://arxiv.org/html/2501.00663v1) + * [Paper 3](https://ml4physicalsciences.github.io/2024/files/NeurIPS_ML4PS_2024_118.pdf) \ No newline at end of file diff --git a/_gsocproposals/2026/proposal_SYMBA7.md b/_gsocproposals/2026/proposal_SYMBA7.md new file mode 100644 index 00000000..1364c846 --- /dev/null +++ b/_gsocproposals/2026/proposal_SYMBA7.md @@ -0,0 +1,50 @@ +--- +title: Foundation models for symbolic regression tasks +layout: gsoc_proposal +project: SYMBA +year: 2026 +organization: + - Alabama + - FSU + - QU +--- + + +## Description +Symbolic regression can be used to rapidly provide solutions to problems in science which may have large computational complexity or may even be intractable. It can be used to discover a symbolic expression describing data such as a physical law. Many approaches have been explored but this project will focus on building a foundation model which incorporates knowledge about the task of symbolic regression and common data representations for physical systems. + + +## Duration + + +Total project length: 175/350 hours. + + +## Task ideas and expected results + * Explore physics-informed ideas for improving data representations, physics-aware models, and physics simulations for squared amplitude calculation + * Apply symbolic machine learning techniques to predict the squared amplitudes and cross section for high-energy physics + +## Requirements + * Python, C++ and some experience in Machine Learning sequence models. + * Some knowledge of physics and extensive knowledge of mathematics preferred. + + + + + + +## Difficulty Level +Advanced + + +## Mentors + * [Abdulhakim Alnuqaydan](mailto:ml4-sci@cern.ch) (Qassim University) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Harrison Prosper](mailto:ml4-sci@cern.ch) (Florida State University) + * [Eric Reinhardt](mailto:ml4-sci@cern.ch) (University of Alabama) + + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. diff --git a/_gsocproposals/2026/proposal_SYMMETRY1.md b/_gsocproposals/2026/proposal_SYMMETRY1.md new file mode 100644 index 00000000..ddbce4a7 --- /dev/null +++ b/_gsocproposals/2026/proposal_SYMMETRY1.md @@ -0,0 +1,59 @@ +--- +title: Semi-supervised Symmetry Discovery +layout: gsoc_proposal +project: SYMMETRY +year: 2026 +organization: + - Alabama + - NISER + +--- + +## Description +Recent success in the domain of unsupervised and semi-supervised learning has been lately a pivotal factor for development of Physics Aware and Symmetry Aware Machine Learning techniques where a model learns the symmetry of a dataset as a meta task and ends up learning the physics through the same. + +Although most of the symmetries that we work with for SM physics are well defined and formulated, they can be well interpreted in 4-vector or 4-momenta basis. With change of representation the symmetries become elusive and difficult to write and work with. This calls for machine learning techniques that can learn the representation of the given symmetry through the means of a conserved quantity for a given abstract representation space. + +Learning these symmetries not only makes us more prepared to deal with the physics constraints in these abstract spaces and coordinates but also makes us able to build neural networks that are invariant to these symmetries. Such neural networks as seen from the existing literature are more robust, stable, interpretable and data efficient. + +This project will focus on ways to learn the symmetries using semi-supervised approaches for the raw CMS calorimetric data stream. + +## Duration +Total project length: 175/350 hours. + +## Task ideas + * Build an understanding of the symmetries present and their nature of representation on the CMS detector space [link](https://arxiv.org/abs/2104.09459). + * Develop a supervised model to learn some of the symmetries given the conserved quantity and nature of augmentations. + * Extend the supervised approach to a semi-supervised setup to discover symmetries without using the augmentation space [link](https://arxiv.org/abs/2302.00236). + * [Advanced Step] Using the symmetries discovered to build physics to build a physics-aware neural network. + +## Expected results + * Discover hidden symmetries present in the CMS dataset. + * Benchmark the models with other previous works in terms of data efficiency and invariance with respect to symmetry operations. + +## Difficulty level +Advanced + +## Requirements +* Proficiency in C++, Python +* Experience with PyTorch and TensorFlow +* Previous experience in Deep Learning + + + +## Mentors + * [Diptarko Choudhury](mailto:ml4-sci@cern.ch) (NISER) + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Ruchi Chudasama](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Samuel Campbell](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Emanuele Usai](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Alex Roman](mailto:ml4-sci@cern.ch) (University of Alabama) + +Please **DO NOT** contact mentors directly by email. Instead, please email [ml4-sci@cern.ch](mailto:ml4-sci@cern.ch) with Project Title and **include your CV** and **test results**. The mentors will then get in touch with you. + +## Links + * [Paper 1](https://arxiv.org/abs/2302.00236) + * [Paper 2](https://arxiv.org/abs/2104.09459) diff --git a/_layouts/main.html b/_layouts/main.html index 933872cc..c1d7e0fd 100644 --- a/_layouts/main.html +++ b/_layouts/main.html @@ -60,28 +60,29 @@

Meetings

Full list of past meetings »

--> +

For Students: The 2026 GSoC term has not yet started. Please check this website for more information on February 19th.

+

-

ML4SCI in GSoC 2025

+

ML4SCI in GSoC 2026

- The ML4Sci open source organization plans to participate in the 2025 Google Summer of Code. If you are a student interested in our projects please check our ideas page. + The ML4Sci open source organization plans to participate in the 2026 Google Summer of Code. If you are a student interested in our projects please check our ideas page. ML4Sci is an umbrella organization that welcomes other projects and organizations related to machine-learning for science. Please contact the admins at ml4-sci@cern.ch if you are interested in participating as a project. Our contributors publish scientific articles in peer-reviewed journals.

GSoC-logo -

Please take a look at our GSoC Page for more details.

+

Please take a look at our GSoC Page for more details.

-

If you are interested in AI in Humanities and Arts please visit our sister GSoC Organization HumanAI planning to take part in GSoC 2025.

+

If you are interested in AI in Humanities and Arts please visit our sister GSoC Organization HumanAI planning to take part in GSoC 2026.

If you are interested in our activities please join our announcements mailing list. To join, you will need to create a CERN lightweight account.

You can also find us on Gitter.

-

Latest update: evaluation test release date: 02/27.

Organization administrators:
diff --git a/gsoc/2026/mentors.md b/gsoc/2026/mentors.md new file mode 100644 index 00000000..bd03720b --- /dev/null +++ b/gsoc/2026/mentors.md @@ -0,0 +1,86 @@ +--- +title: Summary of Mentors GSoC 2026 +layout: plain +--- + +## Full Mentor List (Name, Email, Org) + + * [Sergei Gleyzer](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Emanuele Usai](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Richard S. Miller](mailto:ml4-sci@cern.ch) (Johns Hopkins University Applied Physics Laboratory) + * [James P. Mastandrea](mailto:ml4-sci@cern.ch) (Johns Hopkins University Applied Physics Laboratory) + * [Patrick Peplowski](mailto:ml4-sci@cern.ch) (Johns Hopkins University Applied Physics Laboratory) + * [Aimee Hungerford](mailto:ml4-sci@cern.ch) (Los Alamos National Laboratories) + * [Pranath Reddy](mailto:ml4-sci@cern.ch) (BITS Pilani Hyderabad) + * [Michael Toomey](mailto:ml4-sci@cern.ch) (Brown University) + * [Anna Parul](mailto:ml4-sci@cern.ch) ( University of Alabama) + * [Sourav Raha](mailto:ml4-sci@cern.ch) (University of Florida) + * [Stephon Alexander](mailto:ml4-sci@cern.ch) (Brown University) + * [Brandon Ames](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Tyler Trupke](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Suzanne Rozenzweig](mailto:ml4-sci@cern.ch) (University of Florida) + * [Efe Yigibasi](mailto:ml4-sci@cern.ch) (University of Florida) + * [Darin Acosta](mailto:ml4-sci@cern.ch) (University of Florida) + * [Ali Hariri](mailto:ml4-sci@cern.ch) (American University Beirut) + * [Shravan Chaudhari](mailto:ml4-sci@cern.ch) (New York University) + * [Michael Andrews](mailto:ml4-sci@cern.ch) (Carnegie Mellon University) + * [Harrison Prosper](mailto:ml4-sci@cern.ch) (Florida State University) + * [Michelle Kuchera](mailto:ml4-sci@cern.ch) (Davidson College) + * [Sitong An](mailto:ml4-sci@cern.ch) (Carnegie Mellon University and CERN) + * [Abdulhakim Alnuqaydan](mailto:ml4-sci@cern.ch) (Qassim University) + * [Rui Zhang](mailto:ml4-sci@cern.ch) (University of Wisconsin-Madison) + * [Shaojun Sun](mailto:ml4-sci@cern.ch) (University of Wisconsin-Madison) + * [Chen Zhou](mailto:ml4-sci@cern.ch) (University of Wisconsin-Madison) + * [Wen Guan](mailto:ml4-sci@cern.ch) (University of Wisconsin-Madison) + * [Alkaid Cheng](mailto:ml4-sci@cern.ch) (University of Wisconsin Madison) + * [Jeremy Bailin](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Jacob Morgan](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Jianghao Huyan](mailto:ml4-sci@cern.ch) (University of South Carolina) + * [Varsha Kulkarni](mailto:ml4-sci@cern.ch) (University of South Carolina) + * [Jason Terry](mailto:ml4-sci@cern.ch) (University of Georgia) + * [Stephen Carr](mailto:ml4-sci@cern.ch) (Brown University) + * [Vesna Mitrovic](mailto:ml4-sci@cern.ch) (Brown University) + * [Chandrasekhar Ramanathan](mailto:ml4-sci@cern.ch) (Dartmouth University) + * [Brad Marston](mailto:ml4-sci@cern.ch) (Brown University) + * [Charles Snider](mailto:ml4-sci@cern.ch) (Brown University) + * [Steven Tobias](mailto:ml4-sci@cern.ch) (University of Leeds) + * [Chenyu Zhang](mailto:ml4-sci@cern.ch) (Brown University) + * [Jeremy Quijano](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Sanaz Kiyadeh](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Yurii Halychanskyi](mailto:ml4-sci@cern.ch) (University of Washington) + * [Sinan Gençoğlu](mailto:ml4-sci@cern.ch) (Middle East Technical University) + * [Amal Saif](mailto:ml4-sci@cern.ch) (Princess Sumaya University for Technology) + * [Giorgos Pipilis](mailto:ml4-sci@cern.ch) (NTUA) + * [Ruchi Chudasama](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Tom Magorsh](mailto:ml4-sci@cern.ch) (TUM) + * [Eric Reinhardt](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Samuel Campbell](mailto:ml4-sci@cern.ch) (University of Alabama) + * [Saranga Mahanta](mailto:ml4-sci@cern.ch) (Institut Polytechnique de Paris) + * [Kartik Sachdev](mailto:ml4-sci@cern.ch) (RWTH Aachen) + * [KC Kong](mailto:ml4-sci@cern.ch) (Kansas) + * [Konstantin Matchev](mailto:ml4-sci@cern.ch) (Florida) + * [Katia Matcheva](mailto:ml4-sci@cern.ch) (Florida) + * [Tom Magorsh](mailto:ml4-sci@cern.ch) (TUM) + * [Diptarko Choudhary](mailto:ml4-sci@cern.ch) (NISER) + * [Marco Knipfer](mailto:ml4-sci@cern.ch) (University of Erlangen-Nuremberg) + * [Neeraj Anand](mailto:ml4-sci@cern.ch) (IIT Dhanbad) + + + + + + + + diff --git a/gsoc/2026/summary.md b/gsoc/2026/summary.md new file mode 100644 index 00000000..37444cda --- /dev/null +++ b/gsoc/2026/summary.md @@ -0,0 +1,17 @@ +--- +title: Summary of GSoC 2026 Projects and Supervisors +layout: plain +year: 2026 +--- + +## Full List of Proposals + +{:.table .table-hover .table-striped} +{% assign sorted_proposals = site.gsocproposals | sort: 'title' %} +{% for proposal in sorted_proposals %}{% capture u_proposal_org %}{{ organization | upcase }}{% endcapture %} +{%- assign strings = proposal.url | split: '/' -%} +{%- assign proposal_year = strings[2] | plus: 0 -%} +{%- if proposal_year == page.year %} +| [ {{ proposal.title }} ]( {{ proposal.url }} ) | +{%- endif -%} +{% endfor %}