mmistakes · aginai · Nov 11, 2023 · Nov 11, 2023 · Nov 11, 2023 · Dec 10, 2023
diff --git a/README.md b/README.md
diff --git a/_config.yml b/_config.yml
@@ -12,15 +12,15 @@
 
 # theme                  : "minimal-mistakes-jekyll"
 # remote_theme           : "mmistakes/minimal-mistakes"
-minimal_mistakes_skin    : "default" # "air", "aqua", "contrast", "dark", "dirt", "neon", "mint", "plum", "sunrise"
+minimal_mistakes_skin    : "dark" # "air", "aqua", "contrast", "dark", "dirt", "neon", "mint", "plum", "sunrise"
 
 # Site Settings
 locale                   : "en-US"
-title                    : "Site Title"
+title                    : "Home"
 title_separator          : "-"
 subtitle                 : # site tagline that appears below site title in masthead
-name                     : "Your Name"
-description              : "An amazing website."
+name                     : "Abhijeet Ghawade"
+description              : "Data Analyst at SLB"
 url                      : # the base hostname & protocol for your site e.g. "https://mmistakes.github.io"
 baseurl                  : # the subpath of your site, e.g. "/blog"
 repository               : # GitHub username/repo-name e.g. "mmistakes/minimal-mistakes"
@@ -104,18 +104,18 @@ analytics:
 
 # Site Author
 author:
-  name             : "Your Name"
+  name             : "Abhijeet Ghawade"
   avatar           : # path of avatar image, e.g. "/assets/images/bio-photo.jpg"
-  bio              : "I am an **amazing** person."
-  location         : "Somewhere"
+  bio              : "Data Analyst at SLB"
+  location         : "Pune, India"
   email            :
   links:
     - label: "Email"
       icon: "fas fa-fw fa-envelope-square"
-      # url: "mailto:your.name@email.com"
+      url: "mailto:abhijeet.ghawade12@gmail.com"
     - label: "Website"
       icon: "fas fa-fw fa-link"
-      # url: "https://your-website.com"
+      url: "https://aginai.github.io"
     - label: "Twitter"
       icon: "fab fa-fw fa-twitter-square"
       # url: "https://twitter.com/"
@@ -124,7 +124,7 @@ author:
       # url: "https://facebook.com/"
     - label: "GitHub"
       icon: "fab fa-fw fa-github"
-      # url: "https://github.com/"
+      url: "https://github.com/abhijeetg12"
     - label: "Instagram"
       icon: "fab fa-fw fa-instagram"
       # url: "https://instagram.com/"
@@ -156,6 +156,7 @@ footer:
 include:
   - .htaccess
   - _pages
+  - _posts
 exclude:
   - "*.sublime-project"
   - "*.sublime-workspace"

diff --git a/_data/navigation.yml b/_data/navigation.yml
@@ -1,12 +1,12 @@
 # main links
 main:
-  - title: "Quick-Start Guide"
-    url: https://mmistakes.github.io/minimal-mistakes/docs/quick-start-guide/
-  # - title: "About"
-  #   url: https://mmistakes.github.io/minimal-mistakes/about/
-  # - title: "Sample Posts"
-  #   url: /year-archive/
+  - title: "Get to know me"
+    url: /about/
+  - title: "Personal"
+    url: /about/
+  - title: "Blogs"
+    url: /posts/
   # - title: "Sample Collections"
   #   url: /collection-archive/
   # - title: "Sitemap"
-  #   url: /sitemap/
+  #   url: /sitemap/    
diff --git a/_pages/about.md b/_pages/about.md
@@ -0,0 +1,26 @@
+---
+title: About Me
+permalink: /about/
+image : prof_pic.jpeg
+---
+
+<!-- # About Me
+
+Hello, I'm [Your Name]. Welcome to my personal space on the internet.
+
+
+## Contact
+
+Feel free to reach out to me on [social media](#) or via email at [[email protected]]. -->
+
+## About me 
+![prof_pic](/../assets/splash/cover.jpg)
+Hello! 
+thanks for visiting my personal website! 
+I am Abhijeet, working as a Data Analyst at SLB (Schlumberger) working primarily on Data Analysis. 
+
+Prior to joining SLB, I graduated from Indian Institute of Technology, Madras with a Bachelors and Masters in Electrical engineering. 
+I worked on my final semester project with professor Dr. Balaram Ravindran on option discovery methods for reinforcement learning.
+I spend a semester at the Carnegie Mellon University at the Advanced Agents Robotics Technology Lab in 2019-20 for an undergraduate internship.
+My areas of interest are Deep learning, Large Language Models and Reinforcement Learning.
+
diff --git a/_pages/paper.md b/_pages/paper.md
@@ -0,0 +1,6 @@
+title: SPE paper
+nav: true
+nav_order: 4
+cv_pdf: spe-205877.pdf
+
+---
diff --git a/_pages/projects.md b/_pages/projects.md
@@ -0,0 +1 @@
+## Project
diff --git a/_posts/2023-12-11-spe-paper.md b/_posts/2023-12-11-spe-paper.md
@@ -0,0 +1,25 @@
+---
+title:  " Research Publication"
+last_modified_at: 2018-03-20T16:00:58-04:00
+categories: 
+  - Jekyll
+tags:
+  - update
+toc: true
+toc_label: "Getting Started"
+---
+## Cognitive HSE Risk Prediction and Notification Tool Based on Natural Language Processing
+## Abstract
+The focus of this work is on developing a cognitive tool that predicts the most frequent HSE hazards with
+the highest potential severity levels. The tool identifies these risks using a natural language processing
+algorithm on HSE leading and lagging indicator reports submitted to an oilfield services company’s global
+HSE reporting system. The purpose of the tool is to prioritize proactive actions and provide focus to raise
+workforce awareness.
+A natural language processing algorithm was developed to identify priority HSE risks based on
+potential severity levels and frequency of occurrence. The algorithm uses vectorization, compression, and
+clustering methods to categorize the risks by potential severity and frequency using a formulated risk index
+methodology. In the pilot study, a user interface was developed to configure the frequency and the number
+of the prioritized HSE risks that are to be communicated from the tool to those employees who opted to
+receive the information in a given location.
+
+<a href="/assets/_attachments/spe-205877-ms.pdf" target="_blank"> Link to the paper </a>
diff --git a/_posts/2024-10-08-genai-chatbot.md b/_posts/2024-10-08-genai-chatbot.md
@@ -0,0 +1,136 @@
+# Building a RAG Search System with Azure AI and Streamlit
+
+In this post, I'll walk you through how I built a Retrieval-Augmented Generation (RAG) search system using Azure AI services and Streamlit. This project combines the power of Azure Cognitive Search with Large Language Models to create an intelligent chatbot that can answer questions based on your own knowledge base.
+
+## What is RAG?
+
+Before diving into the implementation details, let's understand what RAG is. Retrieval-Augmented Generation is a technique that enhances Large Language Models by allowing them to access and use external knowledge. Instead of relying solely on the model's trained knowledge, RAG systems first retrieve relevant information from a curated knowledge base and then use that information to generate more accurate and contextual responses.
+
+## System Architecture
+
+The system consists of several key components:
+
+1. **Document Processing Pipeline**: Handles PDF documents by:
+   - Splitting them into pages
+   - Extracting text (including tables)
+   - Chunking content into manageable sections
+   - Uploading to Azure Blob Storage
+
+2. **Azure Cognitive Search**: Indexes and stores the processed documents for efficient retrieval
+
+3. **Streamlit Interface**: Provides a user-friendly chat interface with:
+   - Persona selection
+   - Conversation history
+   - Dynamic response generation
+
+## Key Implementation Features
+
+### Document Processing
+
+One of the most interesting aspects of this project is how it handles document processing. The system can process PDF documents intelligently:
+
+```python
+def get_document_text(filename):
+    if localpdfparser:
+        reader = PdfReader(filename)
+        pages = reader.pages
+        for page_num, p in enumerate(pages):
+            page_text = p.extract_text()
+            page_map.append((page_num, offset, page_text))
+    else:
+        form_recognizer_client = DocumentAnalysisClient(...)
+        # Use Azure Form Recognizer for advanced document analysis
+```
+
+The system can even handle complex tables by converting them to HTML format:
+
+```python
+def table_to_html(table):
+    table_html = "<table>"
+    rows = [sorted([cell for cell in table.cells if cell.row_index == i], 
+            key=lambda cell: cell.column_index) 
+            for i in range(table.row_count)]
+    # Process table structure...
+    return table_html
+```
+
+### Smart Text Chunking
+
+To optimize search performance, the system implements intelligent text chunking:
+
+1. Maintains context across chunks with overlapping sections
+2. Respects sentence boundaries
+3. Handles special cases like tables spanning multiple chunks
+
+### Azure Search Integration
+
+The search index is designed to support semantic search capabilities:
+
+```python
+search_index = SearchIndex(
+    name=index,
+    fields=[
+        SimpleField(name="id", type="Edm.String", key=True),
+        SearchableField(name="content", type="Edm.String", analyzer_name="en.microsoft"),
+        SimpleField(name="category", type="Edm.String", filterable=True, facetable=True),
+        # Additional fields...
+    ],
+    semantic_settings=SemanticSettings(...)
+)
+```
+
+## The User Experience
+
+The Streamlit interface provides a clean, chat-like experience where users can:
+
+1. Select different personas (e.g., TLM Manager, PSD Manager)
+2. Ask questions naturally
+3. See conversation history
+4. Get responses with source references
+
+## Deployment and Configuration
+
+The system is designed to be easily deployable with minimal configuration:
+
+1. Configure Azure services in `secrets.toml`:
+   ```toml
+   [default]
+   searchservice = "your-search-service-name"
+   searchkey = "your-search-service-admin-api-key"
+   index = "your-index-name"
+   ```
+
+2. Run with a simple command:
+   ```bash
+   streamlit run app.py
+   ```
+
+## Technical Considerations
+
+When building this system, I had to address several technical challenges:
+
+1. **Document Processing**: Handling various PDF formats and table structures
+2. **Chunking Strategy**: Balancing chunk size with context preservation
+3. **Search Relevance**: Tuning search parameters for optimal results
+4. **Response Generation**: Creating contextual prompts for the LLM
+
+## Future Improvements
+
+Some potential enhancements I'm considering:
+
+1. Multi-language support
+2. Advanced document preprocessing options
+3. Custom embeddings for improved search relevance
+4. Real-time document updates
+
+## Conclusion
+
+This RAG search system demonstrates how to combine Azure AI services with modern UI frameworks to create a powerful, user-friendly search experience. The modular architecture makes it easy to extend and customize for different use cases.
+
+Feel free to check out the [GitHub repository](https://github.com/your-username/rag-search-azure-ai) for the complete code and documentation.
+
+## Resources
+
+- [Azure Cognitive Search Documentation](https://docs.microsoft.com/en-us/azure/search/)
+- [Streamlit Documentation](https://docs.streamlit.io/)
+- [Python PDF Processing](https://pypdf.readthedocs.io/)