From 174777396a3f01c2039214b8a7f7901e71042145 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20Ke=C3=9Fler?= <git@maximilian-kessler.de>
Date: Sat, 4 Nov 2023 17:12:18 +0100
Subject: [PATCH] initial commit

---
 Make.config |   5 ++
 Makefile    | 131 ++++++++++++++++++++++++++++++++
 README.md   |  34 +++++++++
 example.cpp |  35 +++++++++
 graph.cpp   | 181 ++++++++++++++++++++++++++++++++++++++++++++
 graph.hpp   | 211 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 597 insertions(+)
 create mode 100644 Make.config
 create mode 100644 Makefile
 create mode 100644 README.md
 create mode 100644 example.cpp
 create mode 100644 graph.cpp
 create mode 100644 graph.hpp

diff --git a/Make.config b/Make.config
new file mode 100644
index 0000000..3a18521
--- /dev/null
+++ b/Make.config
@@ -0,0 +1,5 @@
+CXX=clang++
+CXX_STD_FLAGS=-std=c++20
+CC=clang
+C_STD_FLAGS=-std=c99
+LINK_FLAGS=
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..ea73eeb
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,131 @@
+# Configuration (File extensions, tools to be used, etc)
+
+OPTLIBEXT=-O
+DEBUGLIBEXT=-g
+LIBEXTS=$(OPTLIBEXT) $(DEBUGLIBEXT)
+
+ifeq "$(MAKECMDGOALS)" "opt"
+   LIBEXT=$(OPTLIBEXT)
+   OPTFLAGS=$(OPTFLAGS_OPT)
+else
+   LIBEXT=$(DEBUGLIBEXT)
+   OPTFLAGS=$(OPTFLAGS_DEBUG)
+endif
+
+CXX=clang++
+CC=clang
+LN=ln
+RM=rm
+MKDIR=mkdir
+TOUCH=touch
+DIRNAME=dirname
+
+OPTFLAGS_OPT=-O3
+OPTFLAGS_DEBUG=-O0 -g
+# flags used for linking and compilation
+FLAGS=#
+# These flags are for compilation (not linking) only
+# -MMD auto-genenerates .d files in Make format when .o files are created
+COMPILE_FLAGS=-MMD -Werror -Wall -Wextra -pedantic -pipe $(OPTFLAGS)
+# Flags for linking only. 
+LINK_FLAGS=-fuse-ld=gold
+# C / C++ specific compilation flags
+CXX_STD_FLAGS=-std=c++20
+CXX_COMPILE_FLAGS=$(CXX_STD_FLAGS)
+C_STD_FLAGS=-std=c99
+C_COMPILE_FLAGS=$(C_STD_FLAGS)
+
+CXX_SRC_EXTS=cpp C CPP
+C_SRC_EXTS=c
+
+SRC_EXTS=$(CXX_SRC_EXTS) $(C_SRC_EXTS)
+
+BUILDDIR=build
+DIRECTORIES=$(BUILDDIR)
+EXECUTABLE_REL=main
+LINK_NAME=$(BUILDDIR)/$(EXECUTABLE_REL)
+EXECUTABLE=$(LINK_NAME)$(LIBEXT)
+
+# Collect files to be compiled
+
+CXX_SRCS=$(foreach ext, $(CXX_SRC_EXTS), $(shell find . -name "*.$(ext)"))
+C_SRCS=$(foreach ext, $(C_SRC_EXTS), $(shell find . -name "*.$(ext)"))
+SRCS=$(CXX_SRCS) $(C_SRCS)
+OBJS=$(foreach ext, $(SRC_EXTS), $(patsubst %.$(ext), $(BUILDDIR)/%$(LIBEXT).o, $(filter %.$(ext), $(SRCS))))
+
+# If there is at least one C++ source use the c++ compiler CXX to link.
+LINKER=$(CC) $(FLAGS) $(LINK_FLAGS)
+ifneq "$(strip $(CXX_SRCS))" ""
+LINKER=$(CXX) $(FLAGS) $(LINK_FLAGS)
+endif
+
+MAKECONFIG=Make.config
+# The set of Make-files influencing the build, hence everything depends on them
+MAKE_DEPENDENCIES=Makefile $(MAKECONFIG)
+
+DEFAULT_TARGET=debug
+
+# Include Make.config to modify variables above
+-include $(MAKECONFIG)
+
+# The supported meta targets, dependencies
+.PHONY: clean debug opt default check
+default: $(DEFAULT_TARGET)
+clean debug opt: check
+
+opt debug: $(EXECUTABLE)
+
+$(OBJS) : $(MAKE_DEPENDENCIES) | $(BUILDDIR)
+
+# include autogenerated .d files
+-include $(OBJS:.o=.d)
+
+# Rules
+
+define CXX_TO_OBJ
+	$(CXX) $(FLAGS) $(CXX_COMPILE_FLAGS) $(COMPILE_FLAGS) $< -c -o $@
+endef
+
+define C_TO_OBJ
+	$(CC) $(FLAGS) $(C_COMPILE_FLAGS) $(COMPILE_FLAGS) $< -c -o $@
+endef
+
+define MK_TARGETDIR
+	@$(MKDIR) -p $$($(DIRNAME) $@)
+endef
+
+$(BUILDDIR)/%$(LIBEXT).o: %.cpp
+	$(MK_TARGETDIR)
+	$(CXX_TO_OBJ)
+
+$(BUILDDIR)/%$(LIBEXT).o: %.C
+	$(MK_TARGETDIR)
+	$(CXX_TO_OBJ)
+
+$(BUILDDIR)/%$(LIBEXT).o: %.CPP
+	$(MK_TARGETDIR)
+	$(CXX_TO_OBJ)
+
+$(BUILDDIR)/%$(LIBEXT).o: %.c
+	$(MK_TARGETDIR)
+	$(C_TO_OBJ)
+
+$(EXECUTABLE):  $(OBJS) | $(BUILDDIR)
+	$(LINKER) $(OBJS) -o $@
+
+# Create link in opt/debug target
+opt debug:
+	$(LN) -sf $(EXECUTABLE_REL)$(LIBEXT) $(LINK_NAME)
+
+check: $(BUILDDIR)
+	@forbidden=$$(find $(BUILDDIR) -not -type d $(foreach filext, .o .d, -not -name "*$(filext)") $(foreach libext, $(LIBEXTS) "", -not -name "$(EXECUTABLE_REL)$(libext)")); \
+	if [ "$${forbidden}x" != "x" ]; then echo "Error: $(BUILDDIR) contains non-autogenerated files $${forbidden}"; exit 1; fi
+
+$(DIRECTORIES):
+	@$(MKDIR) $@
+
+$(MAKECONFIG):
+	@$(TOUCH) $@
+
+clean:
+	@$(RM) -rf $(BUILDDIR)
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..37f2825
--- /dev/null
+++ b/README.md
@@ -0,0 +1,34 @@
+# Sample Code
+
+## Graph class
+`graph.hpp` and `graph.cpp` contain a simple class to model unweighed undirected graphs that you may use if you wish.
+For convenience, the graph already supports input and output from and to the DIMACS format.
+
+## Main routine
+`example.cpp` contains a toy `main` routine that, for demonstration purposes,
+reads in a graph in DIMACS format, greedily removes edges until
+every node is incident to at most one edge and outputs the result to stdout.
+
+## Makefiles
+This code also contains a Makefile that you can use for compilation if you prefer it to a manual compilation command (but you don't have to).
+In the Make.config file, you can change e.g. the c++ compiler which should be used (`CXX`).
+In order to to use the make setup, open up a terminal and navigate to this folder.
+You can write `make debug` in order to create an executable in which the compiler stayed close
+to your code and generated debug symbols which can e.g. be used in the gnu debugger gdb.
+You can also write `make opt` in order to create an executable in which the compiler was
+allowed to do a lot of optimization as long as the result stays the same.
+This executable is much faster, but not as usefull if you are still testing
+if your program correctly, or why it does not.
+Both variants use all the error flags used in the problem specification,
+meaning the compilation will fail if there are any warnings.
+This is as otherwise one might miss important warnings,
+which can save a lot of time one would otherwise spend debugging!
+Finally you can write `make clean` in order to remove everything generated when building
+one of the other make targets, including the executables and the output directory.
+
+You do not need to read the Makefile itself, but you can if you want to.
+It automatically compiles all files ending with `.cpp`, `.C` and `.CPP` using the C++ compiler specified as `CXX` in the Make.config,
+and compiles all files ending with `.c` using the C compiler specified as `CC` in the same file.
+Finally it links everything together into a binary which is referenced by the symlink `build/main`.
+Note exactly one of your `.C` and `.c` files should include a main function!
+
diff --git a/example.cpp b/example.cpp
new file mode 100644
index 0000000..ca64077
--- /dev/null
+++ b/example.cpp
@@ -0,0 +1,35 @@
+#include <iostream> // For writing to the standard output.
+#include <fstream> // For reading input files.
+
+#include "graph.hpp"
+
+int main(int argc, char** argv)
+{
+   if (argc != 2)
+   {
+      std::cout << "Expected one argument, found " << argc - 1 << std::endl;
+      return EXIT_FAILURE; // return 1 would do the same, but is way too easy to mix up!
+   }
+
+   std::fstream input_file_graph{argv[1]};
+   ED::Graph const graph = ED::Graph::read_dimacs(input_file_graph);
+
+   ED::Graph greedy_matching_as_graph{graph.num_nodes()};
+   for (ED::NodeId node_id = 0; node_id < graph.num_nodes(); ++node_id)
+   {
+      if (greedy_matching_as_graph.node(node_id).neighbors().empty())
+      {
+         for (ED::NodeId neighbor_id : graph.node(node_id).neighbors())
+         {
+            if (greedy_matching_as_graph.node(neighbor_id).neighbors().empty())
+            {
+               greedy_matching_as_graph.add_edge(node_id, neighbor_id);
+               break; // Do not add more edges incident to this node!
+            }
+         }
+      }
+   }
+   std::cout << greedy_matching_as_graph;
+   return EXIT_SUCCESS; 
+}
+
diff --git a/graph.cpp b/graph.cpp
new file mode 100644
index 0000000..7926f0d
--- /dev/null
+++ b/graph.cpp
@@ -0,0 +1,181 @@
+#include "graph.hpp" // always include corresponding header first
+
+/**
+ * Note this included everything from the header similar to copy-pasting it here,
+ * including our two classes, the function declarations and all the includes.
+ * In this file we will actually implement the in- and output routines though,
+ * so we need to include the actual implementation of std::istream and std::ostream.
+ */
+#include <iostream>
+
+/**
+ * We are also going to use stringstream in order to treat a line,
+ * which we have already read from the input, like an input stream.
+ */
+#include <sstream>
+
+/**
+ * The execption header is used to terminate our program
+ * in the case of unexpected input,
+ * which is often the best way to handle such input.
+ * More complex programs may want to catch exceptions in
+ * surrouding code and either try to recover or help debug them.
+ */
+#include <stdexcept>
+
+// Anonymous name spaces may be used to show the reader
+// that a function will only be used in the current file.
+namespace
+{
+
+// Using a function for converting the DIMACS node ids to and from our node ids
+// makes the in and output code more understandable.
+ED::NodeId from_dimacs_id(ED::size_type dimacs_node_id)
+{
+   if (dimacs_node_id <= 0)
+   {
+      throw std::runtime_error("Non-positive DIMACS node id can not be converted.");
+   }
+   return dimacs_node_id - 1;
+}
+
+ED::size_type to_dimacs_id(ED::NodeId node_id)
+{
+   return node_id + 1;
+}
+
+// Returns the first line which is not a comment, i.e. does not start with c.
+std::string read_next_non_comment_line(std::istream & input)
+{
+   std::string line;
+   do
+   {
+      if (!std::getline(input, line))
+      {
+         throw std::runtime_error("Unexpected end of DIMACS stream.");
+      }
+   }
+   while (line[0] == 'c');
+   return line;
+}
+
+} // end of anonymous namespace
+
+namespace ED
+{
+/////////////////////////////////////////////
+//! \c Node definitions
+/////////////////////////////////////////////
+
+void Node::add_neighbor(NodeId const id)
+{
+   _neighbors.push_back(id);
+}
+
+/////////////////////////////////////////////
+//! \c Graph definitions
+/////////////////////////////////////////////
+
+// Whenever reasonably possible you should prefer to use `:`
+// to initalize the members of your class, instead of
+// assigning values to them after they were default initialized.
+// Note you should initialize them in the same order
+// they were declare in back in the class body!
+Graph::Graph(NodeId const num_nodes)
+: _nodes(num_nodes)
+, _num_edges(0)
+{}
+
+void Graph::add_edge(NodeId node1_id, NodeId node2_id)
+{
+   // It is ok if your program crashes for garbage input,
+   // but it should be an explicit, deliberate choice, e.g. like this.
+   if (node1_id == node2_id)
+   {
+      throw std::runtime_error("ED::Graph class does not support loops!");
+   }
+
+   _nodes[node1_id].add_neighbor(node2_id);
+   _nodes[node2_id].add_neighbor(node1_id);
+   ++_num_edges;
+}
+
+Graph Graph::read_dimacs(std::istream & input)
+{   
+   // Unfortunatley the common std input functions require us to first declare
+   // our variables and assign them the correct values only later.
+   // Because we want to avoid unitizalized variables, we use a new syntax
+   // added in c++17 to call the constuctor with no arguments,
+   // often called the default constructor: We write {} behind the variable name.
+
+   // When parsing the DIMACS format, there are some words we are not interested in.
+   // We read them into this variable and never use the afterwards.
+   std::string unused_word{};
+
+   // As we need to watch out for comments, we first need to read the input by line.
+   // In order to split non-comment lines into multiple variables we use a std::stringstream.
+   std::stringstream first_buffering_stream{};
+ 
+   // Note if you do not plan to modify a variable, always declare it as constant.
+   // This does not only prevent you from doing so accidently,
+   // but also helps anybody reading your code understand what you are doing,
+   // as there are less possiblities what can happen.
+   std::string const first_line = read_next_non_comment_line(input);
+
+   size_type num_nodes{};
+   size_type num_edges{};
+   first_buffering_stream << first_line;
+   first_buffering_stream >> unused_word >> unused_word >> num_nodes >> num_edges;
+
+   // Now we successively add edges to our graph;
+   Graph graph(num_nodes);
+   for (size_type i = 1; i <= num_edges; ++i)
+   {
+      // This works just as parsing the first line!
+      std::stringstream ith_buffering_stream{};
+      std::string const ith_line = read_next_non_comment_line(input);
+      size_type dimacs_node1{};
+      size_type dimacs_node2{};
+      ith_buffering_stream << ith_line;
+      ith_buffering_stream >> unused_word >> dimacs_node1 >> dimacs_node2;
+      graph.add_edge(from_dimacs_id(dimacs_node1), from_dimacs_id(dimacs_node2)); 
+   }
+
+   return graph;
+}
+
+std::ostream & operator<<(std::ostream & output, Graph const & graph)
+{
+   // We use std::endl to write new lines here.
+   // If you prefer the new line character, \n on linux, that one works fine, too.
+   output << "c Recall each line starting with c encodes a comment in DIMACS format!" << std::endl
+          << "c The first non-comment line specifies the number of nodes and edges:" << std::endl
+          << "p edge " << graph.num_nodes() << " " << graph.num_edges() << std::endl
+          << "c Each of the remaining non-comment lines specifys an edge by two nodes:" << std::endl;
+
+   // We will need the id of the node we are at, so we write a plain old loop here.
+   for (NodeId node_id = 0; node_id < graph.num_nodes(); ++node_id)
+   {
+      Node const & node = graph.node(node_id);
+      // We do not need to keep track of the index neighbor_id has in node,
+      // so we can use this cool loop syntax introduced in c++11.
+      for (NodeId const & neighbor_id : node.neighbors())
+      {
+	 // Note we iterate over each edge two times, so we use the following
+	 // comparism to check if the edge was not yet written to str!
+         if (node_id < neighbor_id)
+         {
+            output << "e " << to_dimacs_id(node_id) << " " << to_dimacs_id(neighbor_id) << std::endl;
+         }
+      }
+   }
+   output << "c If you use this graph class in your solution, you should probably remove this comments!" << std::endl;
+
+   // Streams sometimes buffer their output.
+   // Once one is done with some output routine, it can make sense to flush them,
+   // which clears the buffer and writes the remaining output.
+   output << std::flush;
+   return output;
+}
+
+} // namespace ED
diff --git a/graph.hpp b/graph.hpp
new file mode 100644
index 0000000..129acce
--- /dev/null
+++ b/graph.hpp
@@ -0,0 +1,211 @@
+// A so called include guard uses the preprocessor to make sure nothing happens when
+// this header is include a second time. This becomes important if there are many headers
+// including each other, as undirected cycles can usually not be avoided. 
+#ifndef GRAPH_HPP
+#define GRAPH_HPP
+
+/**
+   @file graph.hpp
+
+   @brief This file provides a simple class @c Graph to model unweighted undirected graphs.
+**/
+
+/**
+ * In and output in the standard library is done using streams.
+ * In this header we only need to know std::istream and std::ostream are classes,
+ * since we only declare the function which read write our graph
+ * from an std::istream or to an std::ostream, so we only include the forward declaration.
+ */
+#include <iosfwd>
+
+/**
+ * This header defined many different integer types,
+ * enabling us to choose what integers we want to use.
+ */
+#include <cstdint>
+
+/**
+ * Limits are provided by the standard library to check
+ * e.g. if some value can be represented in a certain integer type.
+ */
+#include <limits>
+
+/**
+ *  Vectors are implemented in the standard library as std::vector.
+ *  They encapsulate an array of dynamic size, 
+ *  so that you don't have to know about the exact implementation.
+ *  If you add an element in the end (aka push_back) but the dynamic array is full,
+ *  it will automatically be resized.
+ *  See https://en.cppreference.com/w/cpp/container/vector for documentation.
+ */
+#include <vector>
+
+/**
+ * Namespaces can be used in order to make sure different modules,
+ * possibly implemented by different people don't have classes/functions/...
+ * with the same name. If you want to refer to some symbol S,
+ * which is defined in a namespace N from outside of that namespace,
+ * you need to write N::S.
+ * The most prominent example is std, the namespace used by the standard library.
+ * This namespace is intended to be used for Edmonds.
+ * */
+namespace ED
+{
+
+/**
+ * Using names for types has many advantages.
+ * One of them is being able to switch type with very little effort.
+ * For now, we are going to use unsigned (i.e. non negative)
+ * 32 bit integers for all sizes and indices.
+ * But if there was some large graph for which we need 64 bit indices,
+ * we would only need to change the type once, right here!
+ */
+using size_type = uint32_t;
+
+/**
+ * Another advantage of naming types is making your code more readable.
+ * For example an Id is usually a light weight object (read: few bits)
+ * which uniquely determines some object, in this case a node in our graph.
+ * Note the same Id may be used by different graphs though!
+ */
+using NodeId = size_type;
+
+/**
+   @class Node
+
+   @brief A @c Node stores an array of neighbors (via their ids).
+
+   @note The neighbors are not necessarily ordered, so searching for a specific neighbor takes O(degree)-time.
+**/
+class Node
+{
+public:
+   /** @brief Create an isolated node (you can add neighbors later). **/
+   Node() = default;
+
+   /** @return The number of neighbors of this node. **/
+   size_type degree() const;
+
+   /** @return The array of ids of the neighbors of this node. **/
+   std::vector<NodeId> const & neighbors() const;
+
+private:
+   // This allows each Graph to access private members of this class,
+   // in our case the add_neighbor function
+   friend class Graph;
+
+   /**
+      @brief Adds @c id to the list of neighbors of this node.
+      @warning Does not check whether @c id is already in the list of neighbors (a repeated neighbor is legal, and
+      models parallel edges).
+      @warning Does not check whether @c id is the identity of the node itself (which would create a loop!).
+   **/
+   void add_neighbor(NodeId const id);
+
+   std::vector<NodeId> _neighbors;
+}; // class Node
+
+/**
+   @class Graph
+
+   @brief A @c Graph stores an array of @c Node s, but no array of edges. The list of edges is implicitly given
+   by the fact that the nodes know their neighbors.
+
+   This class models undirected graphs only (in the sense that the method @c add_edge(node1, node2) adds both @c node1
+   as a neighbor of @c node2 and @c node2 as a neighbor of @c node1). It also forbids loops, but parallel edges are
+   legal.
+
+   @warning Nodes are numbered starting at 0, as is usually done in programming,
+    instead starting at 1, as is done in the DIMACS format that your program should take as input!
+    Be careful.
+**/
+class Graph
+{
+public:
+   /**
+      @brief Creates a @c Graph with @c num_nodes isolated nodes.
+
+      The number of nodes in the graph currently cannot be changed. You can only add edges between the existing nodes.
+   **/
+   Graph(NodeId const num_nodes);
+
+   /** @return The number of nodes in the graph. **/
+   NodeId num_nodes() const;
+
+   /** @return The number of edges in the graph. **/
+   size_type num_edges() const;
+
+   /**
+      @return A reference to the id-th entry in the array of @c Node s of this graph.
+   **/
+   Node const & node(NodeId const id) const;
+
+   /**
+      @brief Adds the edge <tt> {node1_id, node2_id} </tt> to this graph.
+
+      Checks that @c node1_id and @c node2_id are distinct and throws an exception otherwise.
+      This method adds both @c node1_id as a neighbor of @c node2_id and @c node2_id as a neighbor of @c node1_id.
+
+      @warning Does not check that the edge does not already exist, so this class can be used to model non-simple graphs.
+   **/
+   void add_edge(NodeId node1_id, NodeId node2_id);
+
+   // Static functions are not called on an object of the class, but on the class itself.
+   /**
+    * Reads a graph in DIMACS format from the given istream and returns that graph.
+    */
+   static Graph read_dimacs(std::istream & str);
+   /**
+     @brief Prints the graph to the given ostream in DIMACS format.
+   **/
+   friend std::ostream & operator<<(std::ostream & str, Graph const & graph);
+private:
+   std::vector<Node> _nodes;
+   size_type _num_edges;
+}; // class Graph
+
+// Calling a function usually has some constant time overhead.
+// The compiler is capable of "inlining" function calls,
+// which means when your code calls this function,
+// the compiler will instead insert the content of the function.
+// This has no affect on your code, but will get rid of this overhead.
+// The inline keywoard recommends the compiler to inline a function.
+// If you use it for some, you must implement that function
+// in the header! For readablility, we put all implementations
+// of inline function into the following inline section.
+//BEGIN: Inline section
+
+inline
+size_type Node::degree() const
+{
+   return neighbors().size();
+}
+
+inline
+std::vector<NodeId> const & Node::neighbors() const
+{
+   return _neighbors;
+}
+
+inline
+NodeId Graph::num_nodes() const
+{
+   return _nodes.size();
+}
+
+inline
+size_type Graph::num_edges() const
+{
+   return _num_edges;
+}
+
+inline
+Node const & Graph::node(NodeId const id) const
+{
+   return _nodes[id];
+}
+//END: Inline section
+
+} // namespace ED
+
+#endif /* GRAPH_HPP */